mirror of
https://git.eden-emu.dev/archive/citron
synced 2026-03-31 00:18:30 -04:00
Merge pull request 'video_core: Add ETC2 texture compression format support' (#80) from feature/etc2-texture-compression-support into main
Reviewed-on: https://git.citron-emu.org/Citron/Emulator/pulls/80
This commit is contained in:
@@ -231,6 +231,12 @@ struct FormatTuple {
|
||||
{VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5_UNORM
|
||||
{VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB
|
||||
{VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9_FLOAT
|
||||
{VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK}, // ETC2_RGB_UNORM
|
||||
{VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK}, // ETC2_RGBA_UNORM
|
||||
{VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK}, // ETC2_RGB_PTA_UNORM
|
||||
{VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK}, // ETC2_RGB_SRGB
|
||||
{VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK}, // ETC2_RGBA_SRGB
|
||||
{VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK}, // ETC2_RGB_PTA_SRGB
|
||||
|
||||
// Depth formats
|
||||
{VK_FORMAT_D32_SFLOAT, Attachable}, // D32_FLOAT
|
||||
@@ -299,6 +305,15 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
|
||||
tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
|
||||
}
|
||||
}
|
||||
// Transcode on hardware that doesn't support ETC2 natively (shouldn't happen on Vulkan 1.0)
|
||||
if (!device.IsOptimalEtc2Supported() && VideoCore::Surface::IsPixelFormatETC2(pixel_format)) {
|
||||
const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
|
||||
if (is_srgb) {
|
||||
tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32;
|
||||
} else {
|
||||
tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
|
||||
}
|
||||
}
|
||||
const bool attachable = (tuple.usage & Attachable) != 0;
|
||||
const bool storage = (tuple.usage & Storage) != 0;
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
|
||||
switch (framebuffer.pixel_format) {
|
||||
case Service::android::PixelFormat::Rgba8888:
|
||||
case Service::android::PixelFormat::Rgbx8888:
|
||||
return VK_FORMAT_R8G8B8A8_UNORM;
|
||||
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
|
||||
case Service::android::PixelFormat::Rgb565:
|
||||
return VK_FORMAT_R5G6B5_UNORM_PACK16;
|
||||
case Service::android::PixelFormat::Bgra8888:
|
||||
@@ -44,7 +44,7 @@ VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
|
||||
static_cast<u32>(framebuffer.pixel_format));
|
||||
return VK_FORMAT_R8G8B8A8_UNORM;
|
||||
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -284,43 +284,19 @@ void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t i
|
||||
const DAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
|
||||
const u8* const host_ptr = device_memory.GetPointer<u8>(framebuffer_addr);
|
||||
|
||||
// Calculate appropriate block height based on texture format and size
|
||||
// This is critical for proper texture swizzling
|
||||
// TODO(Rodrigo): Read this from HLE
|
||||
constexpr u32 block_height_log2 = 4;
|
||||
const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
|
||||
u32 block_height_log2 = 4; // Default for most formats
|
||||
|
||||
// Adjust block height for specific formats that cause corruption
|
||||
if (framebuffer.pixel_format == Service::android::PixelFormat::Rgb565) {
|
||||
block_height_log2 = 3; // RGB565 needs smaller block height
|
||||
} else if (framebuffer.width <= 256 && framebuffer.height <= 256) {
|
||||
block_height_log2 = 3; // Smaller textures need smaller blocks
|
||||
}
|
||||
|
||||
const u64 linear_size{GetSizeInBytes(framebuffer)};
|
||||
const u64 tiled_size{Tegra::Texture::CalculateSize(
|
||||
true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
|
||||
|
||||
if (host_ptr && tiled_size > 0 && linear_size > 0) {
|
||||
// Validate texture data before unswizzling to prevent corruption
|
||||
const u64 max_size = static_cast<u64>(framebuffer.stride) * framebuffer.height * 4; // Max possible size
|
||||
if (tiled_size <= max_size && linear_size <= max_size) {
|
||||
Tegra::Texture::UnswizzleTexture(
|
||||
mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size),
|
||||
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
|
||||
} else {
|
||||
// Fallback: copy raw data without unswizzling if sizes are invalid
|
||||
const u64 copy_size = std::min(linear_size, static_cast<u64>(mapped_span.size() - image_offset));
|
||||
if (copy_size > 0) {
|
||||
std::memcpy(mapped_span.data() + image_offset, host_ptr, copy_size);
|
||||
}
|
||||
}
|
||||
if (host_ptr) {
|
||||
Tegra::Texture::UnswizzleTexture(
|
||||
mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size),
|
||||
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
|
||||
buffer.Flush(); // Ensure host writes are visible before the GPU copy.
|
||||
}
|
||||
|
||||
// Validate framebuffer dimensions to prevent corruption
|
||||
const u32 max_dimension = 8192; // Reasonable maximum for Switch games
|
||||
const u32 safe_width = std::min(framebuffer.width, max_dimension);
|
||||
const u32 safe_height = std::min(framebuffer.height, max_dimension);
|
||||
|
||||
const VkBufferImageCopy copy{
|
||||
.bufferOffset = image_offset,
|
||||
.bufferRowLength = 0,
|
||||
@@ -335,22 +311,20 @@ void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t i
|
||||
.imageOffset = {.x = 0, .y = 0, .z = 0},
|
||||
.imageExtent =
|
||||
{
|
||||
.width = safe_width,
|
||||
.height = safe_height,
|
||||
.width = framebuffer.width,
|
||||
.height = framebuffer.height,
|
||||
.depth = 1,
|
||||
},
|
||||
};
|
||||
scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) {
|
||||
const VkImage image = *raw_images[index];
|
||||
|
||||
// Enhanced memory barriers to prevent texture corruption and flickering
|
||||
const VkImageMemoryBarrier base_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = 0,
|
||||
.dstAccessMask = 0,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
.newLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
@@ -362,34 +336,24 @@ void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t i
|
||||
.layerCount = 1,
|
||||
},
|
||||
};
|
||||
|
||||
// Transition to transfer destination
|
||||
VkImageMemoryBarrier read_barrier = base_barrier;
|
||||
read_barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_TRANSFER_READ_BIT;
|
||||
read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
read_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
||||
|
||||
// Transition to shader read
|
||||
VkImageMemoryBarrier write_barrier = base_barrier;
|
||||
write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
write_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
||||
write_barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
|
||||
// Ensure all previous operations complete before transfer
|
||||
cmdbuf.PipelineBarrier(
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, {}, {}, {read_barrier});
|
||||
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
|
||||
read_barrier);
|
||||
cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
|
||||
|
||||
// Ensure transfer completes before shader access
|
||||
cmdbuf.PipelineBarrier(
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, {}, {}, {write_barrier});
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, write_barrier);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -878,6 +878,11 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched
|
||||
if (IsPixelFormatASTC(image_format) && !device.IsOptimalAstcSupported()) {
|
||||
view_formats[index_a].push_back(VK_FORMAT_A8B8G8R8_UNORM_PACK32);
|
||||
}
|
||||
if (IsPixelFormatETC2(image_format) && !device.IsOptimalEtc2Supported()) {
|
||||
const bool is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_format);
|
||||
view_formats[index_a].push_back(is_srgb ? VK_FORMAT_A8B8G8R8_SRGB_PACK32
|
||||
: VK_FORMAT_A8B8G8R8_UNORM_PACK32);
|
||||
}
|
||||
for (size_t index_b = 0; index_b < VideoCore::Surface::MaxPixelFormat; index_b++) {
|
||||
const auto view_format = static_cast<PixelFormat>(index_b);
|
||||
if (VideoCore::Surface::IsViewCompatible(image_format, view_format, false, true)) {
|
||||
@@ -1488,6 +1493,10 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
|
||||
flags |= VideoCommon::ImageFlagBits::Converted;
|
||||
flags |= VideoCommon::ImageFlagBits::CostlyLoad;
|
||||
}
|
||||
if (IsPixelFormatETC2(info.format) && !runtime->device.IsOptimalEtc2Supported()) {
|
||||
flags |= VideoCommon::ImageFlagBits::Converted;
|
||||
flags |= VideoCommon::ImageFlagBits::CostlyLoad;
|
||||
}
|
||||
if (runtime->device.HasDebuggingToolAttached()) {
|
||||
original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user