fix(Vulkan) Work around a Vulkan driver OOM on Mali devices (#11271) 37439d5fb6

The Mali Vulkan driver/device was running out of memory internally, occasionally. This adds a hook into the testing window that gets run after each GM finishes, which TestingWindowAndroidVulkan now uses to tear the device down completely. The device then gets rebuilt as needed.

Co-authored-by: Josh Jersild <joshua@rive.app>
This commit is contained in:
JoshJRive
2025-12-13 03:43:05 +00:00
parent 8d869457e9
commit 6134855b7f
6 changed files with 246 additions and 163 deletions

View File

@@ -1 +1 @@
1dd286c979e2168ac0f899ca55b562ace933c3e3
37439d5fb6388d0315fdea9b9772f0213ef1dfbc

View File

@@ -21,6 +21,7 @@ public:
bool coreFeaturesOnly = false;
const char* gpuNameFilter = nullptr;
bool headless = false;
bool printInitializationMessage = true;
uint32_t minimumSupportedAPIVersion = VK_API_VERSION_1_0;
@@ -29,6 +30,13 @@ public:
VkSurfaceKHR presentationSurfaceForDeviceSelection = VK_NULL_HANDLE;
};
struct DriverVersion
{
uint32_t major;
uint32_t minor;
uint32_t patch;
};
VulkanDevice(VulkanInstance&, const Options&);
~VulkanDevice();
@@ -66,6 +74,8 @@ public:
const std::string& name() const { return m_name; }
const DriverVersion& driverVersion() const { return m_driverVersion; }
private:
struct FindDeviceResult
{
@@ -73,9 +83,7 @@ private:
std::string deviceName;
VkPhysicalDeviceType deviceType;
uint32_t deviceAPIVersion;
uint32_t driverVersionMajor;
uint32_t driverVersionMinor;
uint32_t driverVersionPatch;
DriverVersion driverVersion;
};
static FindDeviceResult findCompatiblePhysicalDevice(
@@ -103,6 +111,7 @@ private:
std::vector<VkQueueFamilyProperties> m_queueFamilyProperties;
std::string m_name;
DriverVersion m_driverVersion;
VkPhysicalDevice m_physicalDevice;
VkDevice m_device;

View File

@@ -32,33 +32,37 @@ static const char* physicalDeviceTypeName(VkPhysicalDeviceType type)
}
}
void unpackDriverVersion(const VkPhysicalDeviceProperties& props,
uint32_t* majorOut,
uint32_t* minorOut,
uint32_t* patchOut)
VulkanDevice::DriverVersion unpackDriverVersion(
const VkPhysicalDeviceProperties& props)
{
if (props.vendorID == VULKAN_VENDOR_NVIDIA)
{
// NVidia uses 10|8|8|6 encoding for driver version. We'll ignore the
// fourth version section.
*majorOut = props.driverVersion >> 22;
*minorOut = (props.driverVersion >> 14) & 0xff;
*patchOut = (props.driverVersion >> 6) & 0xff;
return {
.major = props.driverVersion >> 22,
.minor = (props.driverVersion >> 14) & 0xff,
.patch = (props.driverVersion >> 6) & 0xff,
};
}
#ifdef _WIN32
else if (props.vendorID == VULKAN_VENDOR_INTEL)
{
*majorOut = props.driverVersion >> 14;
*minorOut = props.driverVersion & 0x3fff;
*patchOut = 0;
return {
.major = props.driverVersion >> 14,
.minor = props.driverVersion & 0x3fff,
.patch = 0,
};
}
#endif
else
{
// Everything else seems to use the standard Vulkan encoding.
*majorOut = VK_API_VERSION_MAJOR(props.driverVersion);
*minorOut = VK_API_VERSION_MINOR(props.driverVersion);
*patchOut = VK_API_VERSION_PATCH(props.driverVersion);
return {
.major = VK_API_VERSION_MAJOR(props.driverVersion),
.minor = VK_API_VERSION_MINOR(props.driverVersion),
.patch = VK_API_VERSION_PATCH(props.driverVersion),
};
}
}
@@ -83,6 +87,7 @@ VulkanDevice::VulkanDevice(VulkanInstance& instance, const Options& opts)
opts.minimumSupportedAPIVersion);
m_physicalDevice = findResult.physicalDevice;
m_name = findResult.deviceName;
m_driverVersion = findResult.driverVersion;
DEFINE_AND_LOAD_INSTANCE_FUNC(vkGetPhysicalDeviceFeatures, instance);
assert(vkGetPhysicalDeviceFeatures != nullptr);
@@ -252,35 +257,39 @@ VulkanDevice::VulkanDevice(VulkanInstance& instance, const Options& opts)
LOAD_MEMBER_INSTANCE_FUNC(vkGetPhysicalDeviceSurfaceCapabilitiesKHR,
instance);
printf("==== Vulkan %i.%i.%i GPU (%s): %s (driver %i.%i.%i) [ ",
VK_API_VERSION_MAJOR(m_riveVulkanFeatures.apiVersion),
VK_API_VERSION_MINOR(m_riveVulkanFeatures.apiVersion),
VK_API_VERSION_PATCH(m_riveVulkanFeatures.apiVersion),
physicalDeviceTypeName(findResult.deviceType),
findResult.deviceName.c_str(),
findResult.driverVersionMajor,
findResult.driverVersionMinor,
findResult.driverVersionPatch);
struct CommaSeparator
if (opts.printInitializationMessage)
{
const char* m_separator = "";
const char* operator*() { return std::exchange(m_separator, ", "); }
} commaSeparator;
if (m_riveVulkanFeatures.independentBlend)
printf("%sindependentBlend", *commaSeparator);
if (m_riveVulkanFeatures.fillModeNonSolid)
printf("%sfillModeNonSolid", *commaSeparator);
if (m_riveVulkanFeatures.fragmentStoresAndAtomics)
printf("%sfragmentStoresAndAtomics", *commaSeparator);
if (m_riveVulkanFeatures.shaderClipDistance)
printf("%sshaderClipDistance", *commaSeparator);
if (m_riveVulkanFeatures.rasterizationOrderColorAttachmentAccess)
printf("%srasterizationOrderColorAttachmentAccess", *commaSeparator);
if (m_riveVulkanFeatures.fragmentShaderPixelInterlock)
printf("%sfragmentShaderPixelInterlock", *commaSeparator);
if (m_riveVulkanFeatures.VK_KHR_portability_subset)
printf("%sVK_KHR_portability_subset", *commaSeparator);
printf(" ] ====\n");
printf("==== Vulkan %i.%i.%i GPU (%s): %s (driver %i.%i.%i) [ ",
VK_API_VERSION_MAJOR(m_riveVulkanFeatures.apiVersion),
VK_API_VERSION_MINOR(m_riveVulkanFeatures.apiVersion),
VK_API_VERSION_PATCH(m_riveVulkanFeatures.apiVersion),
physicalDeviceTypeName(findResult.deviceType),
findResult.deviceName.c_str(),
findResult.driverVersion.major,
findResult.driverVersion.minor,
findResult.driverVersion.patch);
struct CommaSeparator
{
const char* m_separator = "";
const char* operator*() { return std::exchange(m_separator, ", "); }
} commaSeparator;
if (m_riveVulkanFeatures.independentBlend)
printf("%sindependentBlend", *commaSeparator);
if (m_riveVulkanFeatures.fillModeNonSolid)
printf("%sfillModeNonSolid", *commaSeparator);
if (m_riveVulkanFeatures.fragmentStoresAndAtomics)
printf("%sfragmentStoresAndAtomics", *commaSeparator);
if (m_riveVulkanFeatures.shaderClipDistance)
printf("%sshaderClipDistance", *commaSeparator);
if (m_riveVulkanFeatures.rasterizationOrderColorAttachmentAccess)
printf("%srasterizationOrderColorAttachmentAccess",
*commaSeparator);
if (m_riveVulkanFeatures.fragmentShaderPixelInterlock)
printf("%sfragmentShaderPixelInterlock", *commaSeparator);
if (m_riveVulkanFeatures.VK_KHR_portability_subset)
printf("%sVK_KHR_portability_subset", *commaSeparator);
printf(" ] ====\n");
}
}
VulkanDevice::~VulkanDevice() { m_vkDestroyDevice(m_device, nullptr); }
@@ -417,16 +426,12 @@ VulkanDevice::FindDeviceResult VulkanDevice::findCompatiblePhysicalDevice(
if (strstr(props.deviceName, nameFilter) != nullptr)
{
uint32_t major, minor, patch;
unpackDriverVersion(props, &major, &minor, &patch);
matchResult = {
.physicalDevice = device,
.deviceName = props.deviceName,
.deviceType = props.deviceType,
.deviceAPIVersion = props.apiVersion,
.driverVersionMajor = major,
.driverVersionMinor = minor,
.driverVersionPatch = patch,
.driverVersion = unpackDriverVersion(props),
};
matchedDeviceNames.push_back(std::string{props.deviceName});
}
@@ -484,17 +489,12 @@ VulkanDevice::FindDeviceResult VulkanDevice::findCompatiblePhysicalDevice(
if (!onlyAcceptDesiredType ||
props.deviceType == desiredDeviceType)
{
uint32_t major, minor, patch;
unpackDriverVersion(props, &major, &minor, &patch);
return {
.physicalDevice = device,
.deviceName = props.deviceName,
.deviceType = props.deviceType,
.deviceAPIVersion = props.apiVersion,
.driverVersionMajor = major,
.driverVersionMinor = minor,
.driverVersionPatch = patch,
.driverVersion = unpackDriverVersion(props),
};
}
}

View File

@@ -170,6 +170,9 @@ public:
m_height = height;
}
// This is called by the gm testing after each GM runs
virtual void onceAfterGM() {}
struct FrameOptions
{
uint32_t clearColor;

View File

@@ -25,6 +25,7 @@ TestingWindow* TestingWindow::MakeAndroidVulkan(const BackendParams&,
#include <vk_mem_alloc.h>
#include <android/native_app_glue/android_native_app_glue.h>
#include <android/log.h>
#include <thread>
using namespace rive;
using namespace rive::gpu;
@@ -45,7 +46,7 @@ class TestingWindowAndroidVulkan : public TestingWindow
public:
TestingWindowAndroidVulkan(const BackendParams& backendParams,
ANativeWindow* window) :
m_backendParams(backendParams)
m_backendParams(backendParams), m_window(window)
{
using namespace rive_vkb;
@@ -80,7 +81,7 @@ public:
VkAndroidSurfaceCreateInfoKHR androidSurfaceCreateInfo = {
.sType = VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR,
.window = window,
.window = m_window,
};
auto pfnvkCreateAndroidSurfaceKHR =
m_instance->loadInstanceFunc<PFN_vkCreateAndroidSurfaceKHR>(
@@ -90,99 +91,11 @@ public:
&androidSurfaceCreateInfo,
nullptr,
&m_windowSurface));
m_device = std::make_unique<VulkanDevice>(
*m_instance,
VulkanDevice::Options{
.coreFeaturesOnly = m_backendParams.core,
});
m_renderContext = RenderContextVulkanImpl::MakeContext(
m_instance->vkInstance(),
m_device->vkPhysicalDevice(),
m_device->vkDevice(),
m_device->vulkanFeatures(),
m_instance->getVkGetInstanceProcAddrPtr(),
{.forceAtomicMode = backendParams.atomic});
auto windowCapabilities =
m_device->getSurfaceCapabilities(m_windowSurface);
auto swapOpts = VulkanSwapchain::Options{
.formatPreferences =
{
{
.format = m_backendParams.srgb
? VK_FORMAT_R8G8B8A8_SRGB
: VK_FORMAT_R8G8B8A8_UNORM,
.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
},
// Fall back to either ordering of ARGB
{
.format = VK_FORMAT_R8G8B8A8_UNORM,
.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
},
{
.format = VK_FORMAT_B8G8R8A8_UNORM,
.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
},
},
.presentModePreferences =
{
VK_PRESENT_MODE_IMMEDIATE_KHR,
VK_PRESENT_MODE_FIFO_KHR,
},
.imageUsageFlags = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT,
};
if ((windowCapabilities.supportedUsageFlags &
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) != 0)
{
swapOpts.imageUsageFlags |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
}
m_swapchain =
std::make_unique<rive_vkb::VulkanSwapchain>(*m_instance,
*m_device,
ref_rcp(vk()),
m_windowSurface,
swapOpts);
m_androidWindowWidth = m_width = m_swapchain->width();
m_androidWindowHeight = m_height = m_swapchain->height();
m_renderTarget =
impl()->makeRenderTarget(m_width,
m_height,
m_swapchain->imageFormat(),
m_swapchain->imageUsageFlags());
if (m_device->name() == "Mali-G76" || m_device->name() == "Mali-G72")
{
// These devices (like the Huawei P30 or Galaxy S10) will end up
// with a DEVICE_LOST error if we blit to the screen, so don't.
m_allowBlitOffscreenToScreen = false;
}
if (m_device->name() == "PowerVR Rogue GM9446")
{
// These devices (like the Oppo Reno 3 Pro) only give correct
// results when rendering to an off-screen texture.
m_alwaysUseOffscreenTexture = true;
}
}
~TestingWindowAndroidVulkan()
{
// Destroy the swapchain first because it synchronizes for in-flight
// command buffers.
m_swapchain = nullptr;
m_renderContext.reset();
m_renderTarget.reset();
m_overflowTexture.reset();
destroyRenderContext();
if (m_windowSurface != VK_NULL_HANDLE)
{
@@ -192,21 +105,40 @@ public:
}
}
Factory* factory() override { return m_renderContext.get(); }
Factory* factory() override
{
// Some GMs call factory() during construction (before the call to
// resize will rebuild the device), so this function also needs to build
// the render context
if (m_renderContext == nullptr)
{
makeDeviceAndRenderContext();
}
return m_renderContext.get();
}
rive::gpu::RenderContext* renderContext() const override
{
assert(m_renderContext != nullptr);
return m_renderContext.get();
}
rive::gpu::RenderTarget* renderTarget() const override
{
assert(m_renderTarget != nullptr);
return m_renderTarget.get();
}
void resize(int width, int height) override
{
TestingWindow::resize(width, height);
if (m_renderContext == nullptr)
{
makeDeviceAndRenderContext();
}
m_renderTarget =
impl()->makeRenderTarget(m_width,
m_height,
@@ -252,6 +184,11 @@ public:
std::unique_ptr<rive::Renderer> beginFrame(
const FrameOptions& options) override
{
if (m_renderContext == nullptr)
{
makeDeviceAndRenderContext();
}
m_renderContext->beginFrame(RenderContext::FrameDescriptor{
.renderTargetWidth = m_width,
.renderTargetHeight = m_height,
@@ -381,7 +318,129 @@ public:
}
}
void onceAfterGM() override
{
// Mali devices with a driver version before 50 have been known to run
// out of memory, so for these devices, tear down the device between
// GMs.
if (m_device != nullptr &&
strstr(m_device->name().c_str(), "Mali") != nullptr &&
m_device->driverVersion().major < 50)
{
destroyRenderContext();
}
}
private:
void makeDeviceAndRenderContext()
{
using namespace rive_vkb;
m_device = std::make_unique<VulkanDevice>(
*m_instance,
VulkanDevice::Options{
.coreFeaturesOnly = m_backendParams.core,
.printInitializationMessage =
m_printDeviceInitializationMessage,
});
// Only want to print the device initialization message the first time
m_printDeviceInitializationMessage = false;
m_renderContext = RenderContextVulkanImpl::MakeContext(
m_instance->vkInstance(),
m_device->vkPhysicalDevice(),
m_device->vkDevice(),
m_device->vulkanFeatures(),
m_instance->getVkGetInstanceProcAddrPtr(),
{.forceAtomicMode = m_backendParams.atomic});
auto windowCapabilities =
m_device->getSurfaceCapabilities(m_windowSurface);
auto swapOpts = VulkanSwapchain::Options{
.formatPreferences =
{
{
.format = m_backendParams.srgb
? VK_FORMAT_R8G8B8A8_SRGB
: VK_FORMAT_R8G8B8A8_UNORM,
.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
},
// Fall back to either ordering of ARGB
{
.format = VK_FORMAT_R8G8B8A8_UNORM,
.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
},
{
.format = VK_FORMAT_B8G8R8A8_UNORM,
.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
},
},
.presentModePreferences =
{
VK_PRESENT_MODE_IMMEDIATE_KHR,
VK_PRESENT_MODE_FIFO_KHR,
},
.imageUsageFlags = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT,
};
if ((windowCapabilities.supportedUsageFlags &
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) != 0)
{
swapOpts.imageUsageFlags |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
}
m_swapchain =
std::make_unique<rive_vkb::VulkanSwapchain>(*m_instance,
*m_device,
ref_rcp(vk()),
m_windowSurface,
swapOpts);
m_androidWindowWidth = m_swapchain->width();
m_androidWindowHeight = m_swapchain->height();
if (m_width == 0)
{
m_width = m_androidWindowWidth;
}
if (m_height == 0)
{
m_height = m_androidWindowHeight;
}
if (m_device->name() == "Mali-G76" || m_device->name() == "Mali-G72")
{
// These devices (like the Huawei P30 or Galaxy S10) will end up
// with a DEVICE_LOST error if we blit to the screen, so don't.
m_allowBlitOffscreenToScreen = false;
}
if (m_device->name() == "PowerVR Rogue GM9446")
{
// These devices (like the Oppo Reno 3 Pro) only give correct
// results when rendering to an off-screen texture.
m_alwaysUseOffscreenTexture = true;
}
}
void destroyRenderContext()
{
if (m_device != nullptr)
{
m_device->waitUntilIdle();
m_swapchain = nullptr;
m_renderTarget = nullptr;
m_overflowTexture = nullptr;
m_renderContext = nullptr;
m_device = nullptr;
}
}
RenderContextVulkanImpl* impl() const
{
return m_renderContext->static_impl_cast<RenderContextVulkanImpl>();
@@ -389,6 +448,7 @@ private:
VulkanContext* vk() const { return impl()->vulkanContext(); }
ANativeWindow* m_window = nullptr;
BackendParams m_backendParams;
uint32_t m_androidWindowWidth;
uint32_t m_androidWindowHeight;
@@ -402,6 +462,7 @@ private:
// size doesn't fit in the window.
PFN_vkDestroySurfaceKHR m_vkDestroySurfaceKHR = nullptr;
bool m_printDeviceInitializationMessage = true;
bool m_allowBlitOffscreenToScreen = true;
bool m_alwaysUseOffscreenTexture = false;
};

View File

@@ -205,23 +205,33 @@ static void dumpGMs(const std::string& match, bool interactive)
for (const auto& [make_gm, name] : gmRegistry)
{
std::unique_ptr<GM> gm(make_gm());
// Scope the GM so that it destructs (and releases its resources) before
// we call `onceAfterGM` which potentially tears down the entire display
// devices (see: TestingWindowAndroidVulkan)
{
std::unique_ptr<GM> gm(make_gm());
if (!gm)
{
continue;
}
if (match.size() && !contains(name, match))
{
continue; // This gm got filtered out by the '--match' argument.
}
if (!TestHarness::Instance().claimGMTest(name))
{
continue; // A different process already drew this gm.
}
gm->onceBeforeDraw();
if (!gm)
{
continue;
}
if (match.size() && !contains(name, match))
{
continue; // This gm got filtered out by the '--match' argument.
}
if (!TestHarness::Instance().claimGMTest(name))
{
continue; // A different process already drew this gm.
}
gm->onceBeforeDraw();
dump_gm(gm.get(), name);
}
// Allow the testing window to do any cleanup it might want to do
// between GMs
TestingWindow::Get()->onceAfterGM();
dump_gm(gm.get(), name);
if (interactive)
{
// Wait for any key if in interactive mode.