fix(gl): Get running on Adreno 308 (#11467) 28e95e51e7

Adreno 308 had a few issues:

  * Crash from drawing too many instances, which we work around by
    breaking them up with glFlush.

  * Compiler failure from the compiler declaring a 3.1 bulitin in ESSL
    3.0, which we work around with a #define.

  * The advertised max texture size is 8192, but textures larger than
    2048 seem to not work with EXT_multisampled_render_to_texture.
    Either way, we shouldn't have any gms larger than 2048 since that's
    the bare minimum per the spec. Shrink the larger gms down to 2048.

Rive had an issue as well:

  * With EXT_multisampled_render_to_texture but not
    KHR_blend_equation_advanced, we were trying to use the same texture
    for both msaa and the dstRead. Separate these into their own
    textures.

Co-authored-by: Chris Dalton <99840794+csmartdalton@users.noreply.github.com>
This commit is contained in:
csmartdalton
2026-01-16 06:19:00 +00:00
parent 155a726a9b
commit 9617565e7c
8 changed files with 131 additions and 34 deletions

View File

@@ -1 +1 @@
f57124001d4ee388c32a3ec0cd08e292fafc268d
28e95e51e7823ed149fa5fc323ccc9751bca5356

View File

@@ -161,11 +161,12 @@ struct GLCapabilities
uint32_t contextVersionMinor;
uint32_t vendorDriverVersionMajor;
uint32_t vendorDriverVersionMinor;
uint32_t adrenoSeries;
// Workarounds.
// Some Mali and PowerVR devices crash when issuing draw commands with a
// large instancecount.
uint32_t maxSupportedInstancesPerDrawCommand;
// Many devices crash on draw commands with a large instancecount, or when
// drawing many instances without a glFlush to break them up.
uint32_t maxSupportedInstancesPerFlush;
// Chrome 136 crashes when trying to run Rive because it attempts to enable
// blending on the tessellation texture, which is invalid for an integer
// render target. The workaround is to use a floating-point tessellation

View File

@@ -221,6 +221,33 @@ private:
void flush(const FlushDescriptor&) override;
// We have observed Adreno 308 crash when drawing too many instances spread
// across any number of draw calls. This class breaks them up with glFlush
// in order to fix the crashes.
class GLFlushInjector
{
public:
GLFlushInjector(const GLCapabilities& capabilities) :
m_maxSupportedInstancesPerFlush(
capabilities.maxSupportedInstancesPerFlush)
{}
void flushBeforeInstancedDrawIfNeeded(uint32_t nextInstanceCount)
{
if (m_currentFlushInstanceCount + nextInstanceCount >
m_maxSupportedInstancesPerFlush)
{
glFlush();
m_currentFlushInstanceCount = 0;
}
m_currentFlushInstanceCount += nextInstanceCount;
}
private:
const uint32_t m_maxSupportedInstancesPerFlush;
uint32_t m_currentFlushInstanceCount = 0;
};
// Issues the equivalent of glDrawElementsInstancedBaseInstanceEXT(),
// assuming no vertex attribs are instanced, indices are uint16_t, and
// applying workarounds for known driver bugs.
@@ -234,7 +261,8 @@ private:
uint32_t baseIndex,
uint32_t instanceCount,
uint32_t baseInstance,
GLint baseInstanceUniformLocation);
GLint baseInstanceUniformLocation,
GLFlushInjector*);
GLCapabilities m_capabilities;

View File

@@ -75,10 +75,20 @@ public:
virtual void allocateWebGLPLSBacking(const GLCapabilities&) = 0;
#endif
// Returns a texture that may be used in the absence of
// KHR_blend_equation_advanced to make a copy of the render target for
// blending.
GLuint dstColorTexture();
void bindDstColorFramebuffer(GLenum target);
protected:
RenderTargetGL(uint32_t width, uint32_t height) :
RenderTarget(width, height)
{}
private:
glutils::Texture m_dstColorTexture = glutils::Texture::Zero();
glutils::Framebuffer m_dstColorFramebuffer = glutils::Framebuffer::Zero();
};
// GL render target that draws to an external texture provided by the client.

View File

@@ -1710,6 +1710,8 @@ void RenderContextGLImpl::flush(const FlushDescriptor& desc)
desc.firstContour * sizeof(gpu::ContourData));
}
GLFlushInjector flushInjector(m_capabilities);
// Render the complex color ramps into the gradient texture.
if (desc.gradSpanCount > 0)
{
@@ -1739,10 +1741,10 @@ void RenderContextGLImpl::flush(const FlushDescriptor& desc)
m_state->bindVAO(m_colorRampVAO);
GLenum colorAttachment0 = GL_COLOR_ATTACHMENT0;
glInvalidateFramebuffer(GL_FRAMEBUFFER, 1, &colorAttachment0);
for (auto [instanceCount, baseInstance] : InstanceChunker(
for (auto [chunkInstanceCount, chunkBaseInstance] : InstanceChunker(
desc.gradSpanCount,
math::lossless_numeric_cast<uint32_t>(desc.firstGradSpan),
m_capabilities.maxSupportedInstancesPerDrawCommand))
m_capabilities.maxSupportedInstancesPerFlush))
{
glVertexAttribIPointer(
@@ -1750,12 +1752,13 @@ void RenderContextGLImpl::flush(const FlushDescriptor& desc)
4,
GL_UNSIGNED_INT,
0,
reinterpret_cast<const void*>(baseInstance *
reinterpret_cast<const void*>(chunkBaseInstance *
sizeof(gpu::GradientSpan)));
flushInjector.flushBeforeInstancedDrawIfNeeded(chunkInstanceCount);
glDrawArraysInstanced(GL_TRIANGLE_STRIP,
0,
gpu::GRAD_SPAN_TRI_STRIP_VERTEX_COUNT,
instanceCount);
chunkInstanceCount);
}
}
@@ -1771,15 +1774,15 @@ void RenderContextGLImpl::flush(const FlushDescriptor& desc)
m_state->bindVAO(m_tessellateVAO);
GLenum colorAttachment0 = GL_COLOR_ATTACHMENT0;
glInvalidateFramebuffer(GL_FRAMEBUFFER, 1, &colorAttachment0);
for (auto [instanceCount, baseInstance] : InstanceChunker(
desc.tessVertexSpanCount,
math::lossless_numeric_cast<uint32_t>(
desc.firstTessVertexSpan),
m_capabilities.maxSupportedInstancesPerDrawCommand))
for (auto [chunkInstanceCount, chunkBaseInstance] :
InstanceChunker(desc.tessVertexSpanCount,
math::lossless_numeric_cast<uint32_t>(
desc.firstTessVertexSpan),
m_capabilities.maxSupportedInstancesPerFlush))
{
size_t tessSpanOffsetInBytes =
baseInstance * sizeof(gpu::TessVertexSpan);
chunkBaseInstance * sizeof(gpu::TessVertexSpan);
for (GLuint i = 0; i < 3; ++i)
{
glVertexAttribPointer(i,
@@ -1797,11 +1800,12 @@ void RenderContextGLImpl::flush(const FlushDescriptor& desc)
sizeof(TessVertexSpan),
reinterpret_cast<const void*>(tessSpanOffsetInBytes +
offsetof(TessVertexSpan, x0x1)));
flushInjector.flushBeforeInstancedDrawIfNeeded(chunkInstanceCount);
glDrawElementsInstanced(GL_TRIANGLES,
std::size(gpu::kTessSpanIndices),
GL_UNSIGNED_SHORT,
0,
instanceCount);
chunkInstanceCount);
}
}
@@ -1897,7 +1901,8 @@ void RenderContextGLImpl::flush(const FlushDescriptor& desc)
gpu::kMidpointFanCenterAAPatchBaseIndex,
fillBatch.patchCount,
fillBatch.basePatch,
m_atlasFillProgram.baseInstanceUniformLocation());
m_atlasFillProgram.baseInstanceUniformLocation(),
&flushInjector);
}
}
@@ -1920,7 +1925,8 @@ void RenderContextGLImpl::flush(const FlushDescriptor& desc)
gpu::kMidpointFanPatchBaseIndex,
strokeBatch.patchCount,
strokeBatch.basePatch,
m_atlasFillProgram.baseInstanceUniformLocation());
m_atlasFillProgram.baseInstanceUniformLocation(),
&flushInjector);
}
}
@@ -2044,14 +2050,14 @@ void RenderContextGLImpl::flush(const FlushDescriptor& desc)
}
else
{
// Bind the renderTexture where it can be read for in-shader
// Bind the dstColorTexture where it can be read for in-shader
// blending. We will resolve MSAA into this texture before
// issuing draws that use advanced blend.
// NOTE: The renderTexture() function may lazily allocate the
// NOTE: The dstColorTexture() function may lazily allocate the
// texture, so don't call glActiveTexture() until it returns.
GLuint renderTexture = renderTarget->renderTexture();
GLuint dstColorTexture = renderTarget->dstColorTexture();
glActiveTexture(GL_TEXTURE0 + DST_COLOR_TEXTURE_IDX);
glBindTexture(GL_TEXTURE_2D, renderTexture);
glBindTexture(GL_TEXTURE_2D, dstColorTexture);
}
}
}
@@ -2150,7 +2156,7 @@ void RenderContextGLImpl::flush(const FlushDescriptor& desc)
// blending.
assert(desc.interlockMode == gpu::InterlockMode::msaa);
assert(batch.dstReadList != nullptr);
renderTarget->bindTextureFramebuffer(GL_DRAW_FRAMEBUFFER);
renderTarget->bindDstColorFramebuffer(GL_DRAW_FRAMEBUFFER);
for (const Draw* draw = batch.dstReadList; draw != nullptr;
draw = draw->nextDstRead())
{
@@ -2188,7 +2194,8 @@ void RenderContextGLImpl::flush(const FlushDescriptor& desc)
gpu::PatchBaseIndex(drawType),
batch.elementCount,
batch.baseElement,
drawProgram->baseInstanceUniformLocation());
drawProgram->baseInstanceUniformLocation(),
&flushInjector);
break;
}
@@ -2373,7 +2380,8 @@ void RenderContextGLImpl::drawIndexedInstancedNoInstancedAttribs(
uint32_t baseIndex,
uint32_t instanceCount,
uint32_t baseInstance,
GLint baseInstanceUniformLocation)
GLint baseInstanceUniformLocation,
GLFlushInjector* flushInjector)
{
assert(m_capabilities.ANGLE_base_vertex_base_instance_shader_builtin ==
(baseInstanceUniformLocation < 0));
@@ -2382,8 +2390,9 @@ void RenderContextGLImpl::drawIndexedInstancedNoInstancedAttribs(
for (auto [chunkInstanceCount, chunkBaseInstance] :
InstanceChunker(instanceCount,
baseInstance,
m_capabilities.maxSupportedInstancesPerDrawCommand))
m_capabilities.maxSupportedInstancesPerFlush))
{
flushInjector->flushBeforeInstancedDrawIfNeeded(chunkInstanceCount);
#ifndef RIVE_WEBGL
if (m_capabilities.ANGLE_base_vertex_base_instance_shader_builtin)
{
@@ -2521,6 +2530,8 @@ std::unique_ptr<RenderContext> RenderContextGLImpl::MakeContext(
"%u.%u",
&capabilities.contextVersionMajor,
&capabilities.contextVersionMinor);
capabilities.vendorDriverVersionMajor = 0;
capabilities.vendorDriverVersionMinor = 0;
}
else if (capabilities.isPowerVR)
{
@@ -2537,6 +2548,8 @@ std::unique_ptr<RenderContext> RenderContextGLImpl::MakeContext(
"OpenGL ES %u.%u",
&capabilities.contextVersionMajor,
&capabilities.contextVersionMinor);
capabilities.vendorDriverVersionMajor = 0;
capabilities.vendorDriverVersionMinor = 0;
}
#ifdef RIVE_DESKTOP_GL
assert(capabilities.contextVersionMajor == GLAD_GL_version_major);
@@ -2544,6 +2557,12 @@ std::unique_ptr<RenderContext> RenderContextGLImpl::MakeContext(
assert(capabilities.isGLES == static_cast<bool>(GLAD_GL_version_es));
#endif
if (!capabilities.isAdreno ||
!sscanf(rendererString, "Adreno (TM) %d", &capabilities.adrenoSeries))
{
capabilities.adrenoSeries = 0;
}
if (capabilities.isGLES)
{
if (!capabilities.isContextVersionAtLeast(3, 0))
@@ -2569,7 +2588,8 @@ std::unique_ptr<RenderContext> RenderContextGLImpl::MakeContext(
}
}
if (capabilities.isMali || capabilities.isPowerVR)
if (capabilities.isMali || capabilities.isPowerVR ||
(capabilities.isAdreno && capabilities.adrenoSeries < 600))
{
// We have observed crashes on Mali-G71 when issuing instanced draws
// with somewhere between 2^15 and 2^16 instances.
@@ -2577,13 +2597,17 @@ std::unique_ptr<RenderContext> RenderContextGLImpl::MakeContext(
// Skia also reports crashes on PowerVR when drawing somewhere between
// 2^14 and 2^15 instances.
//
// Limit the maximum number of instances we issue per-draw-call on these
// devices to a safe value, far below the observed crash thresholds.
capabilities.maxSupportedInstancesPerDrawCommand = 999;
// We have observed Adreno 308 crash when drawing too many instances
// spread across any number of draw calls. Breaking them up with glFlush
// appears to fix the crashes.
//
// Limit the maximum number of instances we issue per flush on these
// devices, splitting up draw calls if needed.
capabilities.maxSupportedInstancesPerFlush = (1u << 13) - 1u;
}
else
{
capabilities.maxSupportedInstancesPerDrawCommand = ~0u;
capabilities.maxSupportedInstancesPerFlush = ~0u;
}
// Our baseline feature set is GLES 3.0. Capabilities from newer context

View File

@@ -10,6 +10,36 @@
namespace rive::gpu
{
GLuint RenderTargetGL::dstColorTexture()
{
if (m_dstColorTexture == 0)
{
m_dstColorTexture = glutils::Texture();
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, m_dstColorTexture);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, width(), height());
}
return m_dstColorTexture;
}
void RenderTargetGL::bindDstColorFramebuffer(GLenum target)
{
if (m_dstColorFramebuffer == 0)
{
m_dstColorFramebuffer = glutils::Framebuffer();
glBindFramebuffer(target, m_dstColorFramebuffer);
glFramebufferTexture2D(target,
GL_COLOR_ATTACHMENT0,
GL_TEXTURE_2D,
dstColorTexture(),
0);
}
else
{
glBindFramebuffer(target, m_dstColorFramebuffer);
}
}
TextureRenderTargetGL::~TextureRenderTargetGL() {}
void TextureRenderTargetGL::bindTextureFramebuffer(GLenum target)

View File

@@ -653,9 +653,13 @@ precision highp int;
#if @GLSL_VERSION < 310
// Polyfill ES 3.1+ methods.
INLINE half4 unpackUnorm4x8(uint u)
INLINE half4 polyfill_unpackUnorm4x8(uint u)
{
uint4 vals = uint4(u & 0xffu, (u >> 8) & 0xffu, (u >> 16) & 0xffu, u >> 24);
return float4(vals) * (1. / 255.);
}
// Use #define for unpackUnorm4x8 because some drivers (e.g., Adreno 308)
// incorrectly declare this builtin on ES 3.0, leading to compiler errors if we
// just declare it as a normal function.
#define unpackUnorm4x8 polyfill_unpackUnorm4x8
#endif

View File

@@ -15,7 +15,7 @@ namespace rive::gpu
class FeatherGM : public GM
{
public:
FeatherGM() : GM(1800, 2100)
FeatherGM() : GM(1756, 2048)
{
m_paint = TestingWindow::Get()->factory()->makeRenderPaint();
m_paint->color(0xffffffff);
@@ -25,7 +25,7 @@ public:
void onDraw(Renderer* renderer) override
{
renderer->scale(1.5f, 1.5f);
renderer->scale(1.463f, 1.463f);
for (int y = 0; y < 7; ++y)
{
renderer->save();