Vulkan: Optimize the vkImage layout when used as GL_image

If one vkImage has been used as GL_image in compute shader and as
a GL_texture in fragment shader, no dependencies are needed for the
fragment shader and other pre-fragment graphics shaders, like
vertex/tess/geom.
If we only assign the vkImage layout as writable when running GL
executables that have Image Textures, we can specify more precise
read-only barriers when running read-only GL executables.

Bug: angleproject:6862
Change-Id: Iff37fdce13fea637751899253e535bf3f6663200
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/3366014
Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
Reviewed-by: Charlie Lao <cclao@google.com>
This commit is contained in:
Min Zhang
2021-12-30 11:27:26 +08:00
committed by Angle LUCI CQ
parent 7e9e597ea1
commit 3d55cf0c0f
9 changed files with 513 additions and 16 deletions

View File

@@ -191,6 +191,7 @@ Arm Ltd.
Jian Li
Cheryl Wei
Guoxing Wu
Min Zhang
Broadcom Inc.
Gary Sweet

View File

@@ -218,6 +218,13 @@ bool IsRenderPassStartedAndUsesImage(const vk::RenderPassCommandBufferHelper &re
return renderPassCommands.started() && renderPassCommands.usesImage(image);
}
bool IsRenderPassStartedAndTransitionsImageLayout(
const vk::RenderPassCommandBufferHelper &renderPassCommands,
vk::ImageHelper &image)
{
return renderPassCommands.started() && renderPassCommands.isImageWithLayoutTransition(image);
}
// When an Android surface is rotated differently than the device's native orientation, ANGLE must
// rotate gl_Position in the last pre-rasterization shader and gl_FragCoord in the fragment shader.
// Rotation of gl_Position is done in SPIR-V. The following are the rotation matrices for the
@@ -425,7 +432,9 @@ vk::ImageLayout GetImageReadLayout(TextureVk *textureVk,
{
vk::ImageHelper &image = textureVk->getImage();
if (textureVk->hasBeenBoundAsImage())
// If this texture has been bound as image and currently executable program accesses images,
// we consider this image's layout as writeable.
if (textureVk->hasBeenBoundAsImage() && executable->hasImages())
{
return pipelineType == PipelineType::Compute ? vk::ImageLayout::ComputeShaderWrite
: vk::ImageLayout::AllGraphicsShadersWrite;
@@ -5146,13 +5155,11 @@ angle::Result ContextVk::invalidateCurrentTextures(const gl::Context *context, g
ANGLE_TRY(updateActiveTextures(context, command));
// Take care of read-after-write hazards that require implicit synchronization.
if (command == gl::Command::Dispatch)
{
ANGLE_TRY(endRenderPassIfComputeReadAfterAttachmentWrite());
ANGLE_TRY(endRenderPassIfComputeAccessAfterGraphicsImageAccess());
}
}
return angle::Result::Continue;
}
@@ -5178,6 +5185,12 @@ angle::Result ContextVk::invalidateCurrentShaderResources(gl::Command command)
ANGLE_TRY(endRenderPassIfComputeReadAfterTransformFeedbackWrite());
}
// Take care of implict layout transition by compute program access-after-read.
if (hasImages && command == gl::Command::Dispatch)
{
ANGLE_TRY(endRenderPassIfComputeAccessAfterGraphicsImageAccess());
}
// If memory barrier has been issued but the command buffers haven't been flushed, make sure
// they get a chance to do so if necessary on program and storage buffer/image binding change.
const bool hasGLMemoryBarrierIssuedInCommandBuffers =
@@ -7272,14 +7285,41 @@ angle::Result ContextVk::endRenderPassIfComputeReadAfterTransformFeedbackWrite()
return angle::Result::Continue;
}
angle::Result ContextVk::endRenderPassIfComputeReadAfterAttachmentWrite()
// When textures/images bound/used by current compute program and have been accessed
// as sampled texture in current renderpass, need to take care the implicit layout
// transition of these textures/images in the render pass.
angle::Result ContextVk::endRenderPassIfComputeAccessAfterGraphicsImageAccess()
{
// Similar to flushCommandBuffersIfNecessary(), but using textures currently bound and used by
// the current (compute) program. This is to handle read-after-write hazards where the write
// originates from a framebuffer attachment.
const gl::ProgramExecutable *executable = mState.getProgramExecutable();
ASSERT(executable && executable->hasLinkedShaderStage(gl::ShaderType::Compute) &&
executable->hasTextures());
ASSERT(executable && executable->hasLinkedShaderStage(gl::ShaderType::Compute));
for (size_t imageUnitIndex : executable->getActiveImagesMask())
{
const gl::Texture *texture = mState.getImageUnit(imageUnitIndex).texture.get();
if (texture == nullptr)
{
continue;
}
TextureVk *textureVk = vk::GetImpl(texture);
if (texture->getType() == gl::TextureType::Buffer)
{
continue;
}
else
{
vk::ImageHelper &image = textureVk->getImage();
// This is to handle the implicit layout transition in renderpass of this image,
// while it currently be bound and used by current compute program.
if (IsRenderPassStartedAndTransitionsImageLayout(*mRenderPassCommands, image))
{
return flushCommandsAndEndRenderPass(
RenderPassClosureReason::GraphicsTextureImageAccessThenComputeAccess);
}
}
}
const gl::ActiveTexturesCache &textures = mState.getActiveTexturesCache();
const gl::ActiveTextureTypeArray &textureTypes = executable->getActiveSamplerTypes();
@@ -7298,11 +7338,22 @@ angle::Result ContextVk::endRenderPassIfComputeReadAfterAttachmentWrite()
ASSERT(textureVk != nullptr);
vk::ImageHelper &image = textureVk->getImage();
if (IsRenderPassStartedAndUsesImage(*mRenderPassCommands, image))
// Similar to flushCommandBuffersIfNecessary(), but using textures currently bound and used
// by the current (compute) program. This is to handle read-after-write hazards where the
// write originates from a framebuffer attachment.
if (image.hasRenderPassUsageFlag(vk::RenderPassUsage::RenderTargetAttachment) &&
IsRenderPassStartedAndUsesImage(*mRenderPassCommands, image))
{
return flushCommandsAndEndRenderPass(
RenderPassClosureReason::ImageAttachmentThenComputeRead);
}
// Take care of the read image layout transition require implicit synchronization.
if (IsRenderPassStartedAndTransitionsImageLayout(*mRenderPassCommands, image))
{
return flushCommandsAndEndRenderPass(
RenderPassClosureReason::GraphicsTextureImageAccessThenComputeAccess);
}
}
return angle::Result::Continue;

View File

@@ -1166,7 +1166,7 @@ class ContextVk : public ContextImpl, public vk::Context, public MultisampleText
//
angle::Result endRenderPassIfTransformFeedbackBuffer(const vk::BufferHelper *buffer);
angle::Result endRenderPassIfComputeReadAfterTransformFeedbackWrite();
angle::Result endRenderPassIfComputeReadAfterAttachmentWrite();
angle::Result endRenderPassIfComputeAccessAfterGraphicsImageAccess();
void populateTransformFeedbackBufferSet(
size_t bufferCount,

View File

@@ -1393,11 +1393,13 @@ void CommandBufferHelperCommon::executeBarriers(const angle::FeaturesVk &feature
void CommandBufferHelperCommon::imageReadImpl(ContextVk *contextVk,
VkImageAspectFlags aspectFlags,
ImageLayout imageLayout,
ImageHelper *image)
ImageHelper *image,
bool *needLayoutTransition)
{
if (image->isReadBarrierNecessary(imageLayout))
{
updateImageLayoutAndBarrier(contextVk, image, aspectFlags, imageLayout);
*needLayoutTransition = true;
}
}
@@ -1474,7 +1476,8 @@ void OutsideRenderPassCommandBufferHelper::imageRead(ContextVk *contextVk,
ImageLayout imageLayout,
ImageHelper *image)
{
imageReadImpl(contextVk, aspectFlags, imageLayout, image);
bool needLayoutTransition = false;
imageReadImpl(contextVk, aspectFlags, imageLayout, image, &needLayoutTransition);
image->retain(&mResourceUseList);
}
@@ -1574,6 +1577,7 @@ angle::Result RenderPassCommandBufferHelper::reset(Context *context)
mColorAttachmentsCount = PackedAttachmentCount(0);
mDepthStencilAttachmentIndex = kAttachmentIndexInvalid;
mRenderPassUsedImages.clear();
mRenderPassImagesWithLayoutTransition.clear();
mImageOptimizeForPresent = nullptr;
// Reset and re-initialize the command buffers
@@ -1591,7 +1595,12 @@ void RenderPassCommandBufferHelper::imageRead(ContextVk *contextVk,
ImageLayout imageLayout,
ImageHelper *image)
{
imageReadImpl(contextVk, aspectFlags, imageLayout, image);
bool needLayoutTransition = false;
imageReadImpl(contextVk, aspectFlags, imageLayout, image, &needLayoutTransition);
if (needLayoutTransition && !isImageWithLayoutTransition(*image))
{
mRenderPassImagesWithLayoutTransition.insert(image->getImageSerial());
}
// As noted in the header we don't support multiple read layouts for Images.
// We allow duplicate uses in the RP to accommodate for normal GL sampler usage.
@@ -1613,6 +1622,10 @@ void RenderPassCommandBufferHelper::imageWrite(ContextVk *contextVk,
{
imageWriteImpl(contextVk, level, layerStart, layerCount, aspectFlags, imageLayout, aliasingMode,
image);
if (!isImageWithLayoutTransition(*image))
{
mRenderPassImagesWithLayoutTransition.insert(image->getImageSerial());
}
// When used as a storage image we allow for aliased writes.
if (aliasingMode == AliasingMode::Disallowed)

View File

@@ -1029,7 +1029,8 @@ class CommandBufferHelperCommon : angle::NonCopyable
void imageReadImpl(ContextVk *contextVk,
VkImageAspectFlags aspectFlags,
ImageLayout imageLayout,
ImageHelper *image);
ImageHelper *image,
bool *needLayoutTransition);
void imageWriteImpl(ContextVk *contextVk,
gl::LevelIndex level,
uint32_t layerStart,
@@ -1169,6 +1170,7 @@ class RenderPassCommandBufferHelper final : public CommandBufferHelperCommon
ImageHelper *resolveImage);
bool usesImage(const ImageHelper &image) const;
bool isImageWithLayoutTransition(const ImageHelper &image) const;
angle::Result flushToPrimary(Context *context,
PrimaryCommandBuffer *primary,
@@ -1337,6 +1339,10 @@ class RenderPassCommandBufferHelper final : public CommandBufferHelperCommon
// different layout.
angle::FlatUnorderedSet<ImageSerial, kFlatMapSize> mRenderPassUsedImages;
// This can be used to track implicit image layout transition.
// Tracks the read images involved with barrier.
angle::FlatUnorderedSet<ImageSerial, kFlatMapSize> mRenderPassImagesWithLayoutTransition;
// Array size of mColorAttachments
PackedAttachmentCount mColorAttachmentsCount;
// Attached render target images. Color and depth resolve images always come last.
@@ -2380,6 +2386,12 @@ ANGLE_INLINE bool RenderPassCommandBufferHelper::usesImage(const ImageHelper &im
return mRenderPassUsedImages.contains(image.getImageSerial());
}
ANGLE_INLINE bool RenderPassCommandBufferHelper::isImageWithLayoutTransition(
const ImageHelper &image) const
{
return mRenderPassImagesWithLayoutTransition.contains(image.getImageSerial());
}
// A vector of image views, such as one per level or one per layer.
using ImageViewVector = std::vector<ImageView>;

View File

@@ -1444,6 +1444,7 @@ enum class RenderPassClosureReason
XfbWriteThenComputeRead,
XfbWriteThenIndirectDispatchBuffer,
ImageAttachmentThenComputeRead,
GraphicsTextureImageAccessThenComputeAccess,
GetQueryResult,
BeginNonRenderPassQuery,
EndNonRenderPassQuery,

View File

@@ -64,6 +64,7 @@
4092 WIN OPENGL : BufferDataOverflowTest.VertexBufferIntegerOverflow/* = SKIP
4092 WIN GLES : BufferDataOverflowTest.VertexBufferIntegerOverflow/* = SKIP
6064 WIN D3D11 : SimpleStateChangeTestES31.DrawThenChangeFBOThenDrawThenFlushInAnotherThreadThenDrawIndexed/* = SKIP
4404 WIN D3D11 : ComputeShaderTest.FSReadImageThenCSSample/* = SKIP
6101 WIN OPENGL INTEL : BlitFramebufferTest.OOBWrite/* = SKIP
1229184 WIN NVIDIA VULKAN : SimpleStateChangeTest.RedefineFramebufferTexture/* = SKIP
6173 WIN INTEL OPENGL : GLSLTest_ES31.BoolInInterfaceBlocks/* = SKIP
@@ -89,6 +90,7 @@
1252169 LINUX AMD OPENGL : ComputeShaderTest.ImageBufferMapWrite/* = SKIP
1252169 LINUX AMD OPENGL : ComputeShaderTest.BufferImageBufferMapWrite/* = SKIP
6585 LINUX AMD VULKAN : EXTBlendFuncExtendedDrawTest.FragData/* = SKIP
7300 LINUX INTEL OPENGL : ComputeShaderTest.DrawDispatchImageReadDraw/* = SKIP
// Nvidia
6115 NVIDIA OPENGL : GLSLTestLoops.DoWhileContinue/* = SKIP
@@ -105,6 +107,7 @@
6977 NVIDIA OpenGL : MipmapTestES31.GenerateMipmapWithDraw/* = SKIP
6977 NVIDIA GLES : MipmapTestES31.GenerateMipmapWithDraw/* = SKIP
6977 LINUX NVIDIA OpenGL : MipmapTestES31.GenerateLowerMipsWithDraw/* = SKIP
7301 LINUX NVIDIA OpenGL : CopyTexImageTest.RGBAToRGB/ES2_OpenGL_EmulateCopyTexImage2DFromRenderbuffers/* = SKIP
// Nvidia Vulkan
7236 NVIDIA VULKAN : GLSLTest_ES31.TessellationControlShaderMatrixCopyBug/* = SKIP
@@ -398,6 +401,7 @@
7142 PIXEL4ORXL GLES : GLSLTest.AliasingFunctionOutParamAndGlobal/* = SKIP
7213 PIXEL4ORXL GLES : BufferDataTestES3.BufferDataWithNullFollowedByMap/* = SKIP
7265 PIXEL4ORXL GLES : PbufferTest.BindTexImageAfterTexImage/* = SKIP
5981 PIXEL4ORXL GLES : ComputeShaderTest.DrawDispatchImageReadDraw/* = SKIP
5946 PIXEL4ORXL VULKAN : TransformFeedbackTestES32.PrimitivesWrittenAndGenerated/* = SKIP
5947 PIXEL4ORXL VULKAN : FramebufferFetchES31.DrawFetchBlitDrawFetch_NonCoherent/* = SKIP

View File

@@ -3843,6 +3843,7 @@ void main(void) {
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
EXPECT_GL_NO_ERROR();
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
glUseProgram(csProgram);
glDispatchCompute(1, 1, 1);
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
@@ -4862,6 +4863,194 @@ void main()
}
}
// Test one texture sampled by fragment shader, then bind it to image, followed by compute
// shader load this image, and fragment shader read it again.
TEST_P(ComputeShaderTest, DrawDispatchImageReadDraw)
{
constexpr char kVSSource[] = R"(#version 310 es
in vec4 a_position;
out vec2 v_texCoord;
void main()
{
gl_Position = vec4(a_position.xy, 0.0, 1.0);
v_texCoord = a_position.xy * 0.5 + vec2(0.5);
})";
constexpr char kFSSource[] = R"(#version 310 es
precision mediump float;
uniform sampler2D u_tex2D;
in vec2 v_texCoord;
out vec4 out_FragColor;
void main()
{
out_FragColor = texture(u_tex2D, v_texCoord);
})";
constexpr char kCSSource[] = R"(#version 310 es
layout(local_size_x=1, local_size_y=1, local_size_z=1) in;
layout(rgba32f, binding=0) readonly uniform highp image2D uIn;
layout(std140, binding=0) buffer buf {
vec4 outData;
};
void main()
{
outData = imageLoad(uIn, ivec2(gl_LocalInvocationID.xy));
})";
GLfloat initValue[4] = {1.0, 1.0, 1.0, 1.0};
// Step 1: Set up a simple 2D Texture rendering loop.
GLTexture texture;
glBindTexture(GL_TEXTURE_2D, texture);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA32F, 1, 1);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 1, 1, GL_RGBA, GL_FLOAT, initValue);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
GLBuffer vertexBuffer;
GLfloat vertices[] = {-1, -1, 1, -1, -1, 1, 1, 1};
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
glBufferData(GL_ARRAY_BUFFER, 8 * sizeof(GLfloat), vertices, GL_STATIC_DRAW);
GLBuffer ssbo;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
glBufferData(GL_SHADER_STORAGE_BUFFER, 16, nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
ANGLE_GL_PROGRAM(program, kVSSource, kFSSource);
glUseProgram(program);
GLint posLoc = glGetAttribLocation(program, "a_position");
ASSERT_NE(-1, posLoc);
glVertexAttribPointer(posLoc, 2, GL_FLOAT, GL_FALSE, 0, nullptr);
glEnableVertexAttribArray(posLoc);
ASSERT_GL_NO_ERROR();
glBindImageTexture(0, texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA32F);
glDrawArrays(GL_TRIANGLES, 0, 6);
ASSERT_GL_NO_ERROR();
// Step 2: load this image through compute
ANGLE_GL_COMPUTE_PROGRAM(csProgram, kCSSource);
glUseProgram(csProgram);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo);
glDispatchCompute(1, 1, 1);
EXPECT_GL_NO_ERROR();
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
const GLfloat *ptr = reinterpret_cast<const GLfloat *>(
glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, 16, GL_MAP_READ_BIT));
EXPECT_GL_NO_ERROR();
for (unsigned int idx = 0; idx < 4; idx++)
{
EXPECT_EQ(1.0, *(ptr + idx));
}
// Step3: use the first program sample texture again
glUseProgram(program);
glDrawArrays(GL_TRIANGLES, 0, 6);
ASSERT_GL_NO_ERROR();
EXPECT_PIXEL_COLOR_EQ(1, 1, GLColor::white);
}
// Test fragment shader read a image, followed by compute shader sample it.
TEST_P(ComputeShaderTest, FSReadImageThenCSSample)
{
constexpr char kVSSource[] = R"(#version 310 es
in vec4 a_position;
out vec2 v_texCoord;
void main()
{
gl_Position = vec4(a_position.xy, 0.0, 1.0);
v_texCoord = a_position.xy * 0.5 + vec2(0.5);;
})";
constexpr char kFSSource[] = R"(#version 310 es
precision mediump float;
layout(rgba32f, binding=0) readonly uniform highp image2D uIn;
in vec2 v_texCoord;
out vec4 out_FragColor;
void main()
{
out_FragColor = imageLoad(uIn, ivec2(v_texCoord));
})";
constexpr char kCSSource[] = R"(#version 310 es
layout(local_size_x=1, local_size_y=1, local_size_z=1) in;
layout(std140, binding=0) buffer buf {
vec4 outData;
};
uniform sampler2D u_tex2D;
void main()
{
outData = texture(u_tex2D, vec2(gl_LocalInvocationID.xy));
})";
GLfloat initValue[4] = {1.0, 1.0, 1.0, 1.0};
// Step 1: Set up a simple 2D Texture rendering loop.
GLTexture texture;
glBindTexture(GL_TEXTURE_2D, texture);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA32F, 1, 1);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 1, 1, GL_RGBA, GL_FLOAT, initValue);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
GLBuffer vertexBuffer;
GLfloat vertices[] = {-1, -1, 1, -1, -1, 1, 1, 1};
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
glBufferData(GL_ARRAY_BUFFER, 8 * sizeof(GLfloat), vertices, GL_STATIC_DRAW);
GLBuffer ssbo;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
glBufferData(GL_SHADER_STORAGE_BUFFER, 16, nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
ANGLE_GL_PROGRAM(program, kVSSource, kFSSource);
glUseProgram(program);
GLint posLoc = glGetAttribLocation(program, "a_position");
ASSERT_NE(-1, posLoc);
glVertexAttribPointer(posLoc, 2, GL_FLOAT, GL_FALSE, 0, nullptr);
glEnableVertexAttribArray(posLoc);
ASSERT_GL_NO_ERROR();
glBindImageTexture(0, texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA32F);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
ASSERT_GL_NO_ERROR();
// Step 2: load this image through compute
ANGLE_GL_COMPUTE_PROGRAM(csProgram, kCSSource);
glUseProgram(csProgram);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo);
glDispatchCompute(1, 1, 1);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
const GLfloat *ptr = reinterpret_cast<const GLfloat *>(
glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, 16, GL_MAP_READ_BIT));
EXPECT_GL_NO_ERROR();
for (unsigned int idx = 0; idx < 4; idx++)
{
EXPECT_EQ(1.0, *(ptr + idx));
}
}
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ComputeShaderTest);
ANGLE_INSTANTIATE_TEST_ES31(ComputeShaderTest);

View File

@@ -4527,6 +4527,232 @@ TEST_P(VulkanPerformanceCounterTest, UniformUpdatesHitDescriptorSetCache)
EXPECT_EQ(expectedCacheMisses, actualCacheMisses);
}
// Test one texture sampled by fragment shader, then image load it by compute
// shader, at last fragment shader do something else.
TEST_P(VulkanPerformanceCounterTest_ES31, DrawDispatchImageReadDrawWithEndRP)
{
constexpr char kVSSource[] = R"(#version 310 es
in vec4 a_position;
out vec2 v_texCoord;
void main()
{
gl_Position = vec4(a_position.xy, 0.0, 1.0);
v_texCoord = a_position.xy * 0.5 + vec2(0.5);
})";
constexpr char kFSSource[] = R"(#version 310 es
precision mediump float;
uniform sampler2D u_tex2D;
in vec2 v_texCoord;
out vec4 out_FragColor;
void main()
{
out_FragColor = texture(u_tex2D, v_texCoord);
})";
constexpr char kFSSource1[] = R"(#version 310 es
precision mediump float;
out vec4 out_FragColor;
void main()
{
out_FragColor = vec4(1.0);
})";
constexpr char kCSSource[] = R"(#version 310 es
layout(local_size_x=1, local_size_y=1, local_size_z=1) in;
layout(rgba32f, binding=0) readonly uniform highp image2D uIn;
layout(std140, binding=0) buffer buf {
vec4 outData;
};
void main()
{
outData = imageLoad(uIn, ivec2(gl_LocalInvocationID.xy));
})";
GLfloat initValue[4] = {1.0, 1.0, 1.0, 1.0};
// Step 1: Set up a simple 2D Texture rendering loop.
GLTexture texture;
glBindTexture(GL_TEXTURE_2D, texture);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA32F, 1, 1);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 1, 1, GL_RGBA, GL_FLOAT, initValue);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
GLBuffer vertexBuffer;
GLfloat vertices[] = {-1, -1, 1, -1, -1, 1, 1, 1};
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
glBufferData(GL_ARRAY_BUFFER, 8 * sizeof(GLfloat), vertices, GL_STATIC_DRAW);
ANGLE_GL_PROGRAM(program, kVSSource, kFSSource);
glUseProgram(program);
GLint posLoc = glGetAttribLocation(program, "a_position");
ASSERT_NE(-1, posLoc);
glVertexAttribPointer(posLoc, 2, GL_FLOAT, GL_FALSE, 0, nullptr);
glEnableVertexAttribArray(posLoc);
ASSERT_GL_NO_ERROR();
glBindImageTexture(0, texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA32F);
GLBuffer ssbo;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
glBufferData(GL_SHADER_STORAGE_BUFFER, 16, nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
// This is actually suboptimal, and ideally only one render pass should be necessary.
uint32_t expectedRenderPassCount = getPerfCounters().renderPasses + 2;
glDrawArrays(GL_TRIANGLES, 0, 6);
ASSERT_GL_NO_ERROR();
// Step 2: load this image through compute
ANGLE_GL_COMPUTE_PROGRAM(csProgram, kCSSource);
glUseProgram(csProgram);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo);
glDispatchCompute(1, 1, 1);
EXPECT_GL_NO_ERROR();
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
const GLfloat *ptr = reinterpret_cast<const GLfloat *>(
glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, 16, GL_MAP_READ_BIT));
EXPECT_GL_NO_ERROR();
for (unsigned int idx = 0; idx < 4; idx++)
{
EXPECT_EQ(1.0, *(ptr + idx));
}
// Step3
ANGLE_GL_PROGRAM(program2, kVSSource, kFSSource1);
glUseProgram(program2);
glDrawArrays(GL_TRIANGLES, 0, 6);
ASSERT_GL_NO_ERROR();
uint32_t actualRenderPassCount = getPerfCounters().renderPasses;
EXPECT_EQ(expectedRenderPassCount, actualRenderPassCount);
}
// Test one texture sampled by fragment shader, followed by glReadPixels, then image
// load it by compute shader, and at last fragment shader do something else.
TEST_P(VulkanPerformanceCounterTest_ES31, DrawDispatchImageReadDrawWithoutEndRP)
{
constexpr char kVSSource[] = R"(#version 310 es
in vec4 a_position;
out vec2 v_texCoord;
void main()
{
gl_Position = vec4(a_position.xy, 0.0, 1.0);
v_texCoord = a_position.xy * 0.5 + vec2(0.5);
})";
constexpr char kFSSource[] = R"(#version 310 es
precision mediump float;
uniform sampler2D u_tex2D;
in vec2 v_texCoord;
out vec4 out_FragColor;
void main()
{
out_FragColor = texture(u_tex2D, v_texCoord);
})";
constexpr char kFSSource1[] = R"(#version 310 es
precision mediump float;
out vec4 out_FragColor;
void main()
{
out_FragColor = vec4(1.0);
})";
constexpr char kCSSource[] = R"(#version 310 es
layout(local_size_x=1, local_size_y=1, local_size_z=1) in;
layout(rgba32f, binding=0) readonly uniform highp image2D uIn;
layout(std140, binding=0) buffer buf {
vec4 outData;
};
void main()
{
outData = imageLoad(uIn, ivec2(gl_LocalInvocationID.xy));
})";
GLfloat initValue[4] = {1.0, 1.0, 1.0, 1.0};
// Step 1: Set up a simple 2D Texture rendering loop.
GLTexture texture;
glBindTexture(GL_TEXTURE_2D, texture);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA32F, 1, 1);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 1, 1, GL_RGBA, GL_FLOAT, initValue);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
GLBuffer vertexBuffer;
GLfloat vertices[] = {-1, -1, 1, -1, -1, 1, 1, 1};
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
glBufferData(GL_ARRAY_BUFFER, 8 * sizeof(GLfloat), vertices, GL_STATIC_DRAW);
GLBuffer ssbo;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
glBufferData(GL_SHADER_STORAGE_BUFFER, 16, nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
ANGLE_GL_PROGRAM(program, kVSSource, kFSSource);
glUseProgram(program);
GLint posLoc = glGetAttribLocation(program, "a_position");
ASSERT_NE(-1, posLoc);
glVertexAttribPointer(posLoc, 2, GL_FLOAT, GL_FALSE, 0, nullptr);
glEnableVertexAttribArray(posLoc);
ASSERT_GL_NO_ERROR();
glDrawArrays(GL_TRIANGLES, 0, 6);
ASSERT_GL_NO_ERROR();
// Call glReadPixels to reset the getPerfCounters().renderPasses
std::vector<GLColor> actualColors(1);
glReadPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, actualColors.data());
// Ideally, the following "FS sample + CS image load + FS something", should
// handle in one render pass.
// Currently, we can ensure the first of "FS sample + CS image load" in one
// render pass, but will start new render pass if following the last FS operations,
// which need to be optimized further.
uint32_t expectedRenderPassCount = getPerfCounters().renderPasses + 2;
// Now this texture owns none layout transition
glDrawArrays(GL_TRIANGLES, 0, 6);
ASSERT_GL_NO_ERROR();
// Step 2: load this image through compute
ANGLE_GL_COMPUTE_PROGRAM(csProgram, kCSSource);
glUseProgram(csProgram);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo);
glBindImageTexture(0, texture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA32F);
glDispatchCompute(1, 1, 1);
EXPECT_GL_NO_ERROR();
// Step3
ANGLE_GL_PROGRAM(program2, kVSSource, kFSSource1);
glUseProgram(program2);
glDrawArrays(GL_TRIANGLES, 0, 6);
ASSERT_GL_NO_ERROR();
uint32_t actualRenderPassCount = getPerfCounters().renderPasses;
EXPECT_EQ(expectedRenderPassCount, actualRenderPassCount);
}
// Verify a mid-render pass clear of a newly enabled attachment uses LOAD_OP_CLEAR.
TEST_P(VulkanPerformanceCounterTest, DisableThenMidRenderPassClear)
{