D3D11: Do not specialize HLSL for multisampled rendering

Pass the required state as a driver uniform instead of
triggering fragment shader recompilation at draw calls.

Bug: angleproject:8097
Bug: angleproject:8131
Fixed: chromium:1468993
Change-Id: I15825c0522b29d48eb2c3b341ba96f139ef26460
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/4742579
Reviewed-by: Geoff Lang <geofflang@chromium.org>
Commit-Queue: Alexey Knyazev <lexa.knyazev@gmail.com>
This commit is contained in:
Alexey Knyazev
2023-08-02 00:00:00 +00:00
committed by Angle LUCI CQ
parent cb9dd1c9b9
commit 335c6b86d7
8 changed files with 62 additions and 102 deletions

View File

@@ -721,14 +721,6 @@ void OutputHLSL::header(TInfoSinkBase &out,
out << flatEvaluateFunction.functionDefinition << "\n";
}
}
if (!mSampleEvaluateFunctions.empty())
{
out << "\n// EvaluateAttributeAtSample functions\n\n";
for (const auto &sampleEvaluateFunction : mSampleEvaluateFunctions)
{
out << sampleEvaluateFunction.functionDefinition << "\n";
}
}
if (mUsesDiscardRewriting)
{
@@ -921,6 +913,7 @@ void OutputHLSL::header(TInfoSinkBase &out,
if (mOutputType == SH_HLSL_4_1_OUTPUT)
{
out << " uint dx_Misc : packoffset(c2.w);\n";
unsigned int registerIndex = 4;
mResourcesHLSL->samplerMetadataUniforms(out, registerIndex);
// Sampler metadata struct must be two 4-vec, 32 bytes.
@@ -2824,12 +2817,18 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node)
case EOpInterpolateAtSample:
{
TIntermTyped *interpolantNode = (*(node->getSequence()))[0]->getAsTyped();
const TString &functionName =
IsFlatInterpolant(interpolantNode)
? addFlatEvaluateFunction(interpolantNode->getType(),
*StaticType::GetBasic<EbtInt, EbpUndefined, 1>())
: addSampleEvaluateFunction(interpolantNode->getType());
outputTriplet(out, visit, (functionName + "(").c_str(), ", ", ")");
if (!IsFlatInterpolant(interpolantNode))
{
mUsesNumSamples = true;
outputTriplet(out, visit, "EvaluateAttributeAtSample(", ", clamp(",
", 0, gl_NumSamples - 1))");
}
else
{
const TString &functionName = addFlatEvaluateFunction(
interpolantNode->getType(), *StaticType::GetBasic<EbtInt, EbpUndefined, 1>());
outputTriplet(out, visit, (functionName + "(").c_str(), ", ", ")");
}
break;
}
case EOpInterpolateAtOffset:
@@ -3914,43 +3913,6 @@ TString OutputHLSL::addFlatEvaluateFunction(const TType &type, const TType &para
return function.functionName;
}
TString OutputHLSL::addSampleEvaluateFunction(const TType &type)
{
for (const auto &sampleEvaluateFunction : mSampleEvaluateFunctions)
{
if (sampleEvaluateFunction.type == type)
{
return sampleEvaluateFunction.functionName;
}
}
SampleEvaluateFunction function;
function.type = type;
const TString &typeName = TypeString(type);
function.functionName = "angle_eval_sample_" + typeName;
// If multisample buffers are not available, the input varying
// must be evaluated at the center of the pixel. Direct3D does
// not support single-sampled rendering, thus specializing the
// function is needed.
TInfoSinkBase fnOut;
fnOut << typeName << " " << function.functionName << "(" << typeName << " i, uint s)\n";
fnOut << "{\n"
<< "#ifdef ANGLE_MULTISAMPLING\n"
<< " return EvaluateAttributeAtSample(i, s);\n"
<< "#else\n"
<< " return i;\n"
<< "#endif\n"
<< "}\n";
function.functionDefinition = fnOut.c_str();
mSampleEvaluateFunctions.push_back(function);
return function.functionName;
}
void OutputHLSL::ensureStructDefined(const TType &type)
{
const TStructure *structure = type.getStruct();

View File

@@ -151,7 +151,6 @@ class OutputHLSL : public TIntermTraverser
TString addArrayAssignmentFunction(const TType &type);
TString addArrayConstructIntoFunction(const TType &type);
TString addFlatEvaluateFunction(const TType &type, const TType &parameterType);
TString addSampleEvaluateFunction(const TType &type);
// Ensures if the type is a struct, the struct is defined
void ensureStructDefined(const TType &type);
@@ -286,12 +285,6 @@ class OutputHLSL : public TIntermTraverser
};
std::vector<FlatEvaluateFunction> mFlatEvaluateFunctions;
struct SampleEvaluateFunction : public HelperFunction
{
TType type;
};
std::vector<SampleEvaluateFunction> mSampleEvaluateFunctions;
sh::WorkGroupSize mWorkGroupSize;
PerformanceDiagnostics *mPerfDiagnostics;

View File

@@ -320,7 +320,7 @@ std::string DynamicHLSL::generatePixelShaderForOutputSignature(
const std::vector<PixelShaderOutputVariable> &outputVariables,
FragDepthUsage fragDepthUsage,
bool usesSampleMask,
const std::pair<bool, const std::vector<GLenum>> &outputLayoutKey,
const std::vector<GLenum> &outputLayout,
const std::vector<ShaderStorageBlock> &shaderStorageBlocks,
size_t baseUAVRegister) const
{
@@ -342,9 +342,6 @@ std::string DynamicHLSL::generatePixelShaderForOutputSignature(
}
}();
const bool multisampling = outputLayoutKey.first;
const std::vector<GLenum> &outputLayout = outputLayoutKey.second;
std::ostringstream declarationStream;
std::ostringstream copyStream;
@@ -398,11 +395,11 @@ std::string DynamicHLSL::generatePixelShaderForOutputSignature(
copyStream << " output.gl_Depth = gl_Depth; \n";
}
// Do not write to SV_Coverage when rendering to single-sampled targets
if (multisampling && usesSampleMask)
if (usesSampleMask)
{
declarationStream << " uint sampleMask : SV_Coverage;\n";
copyStream << " output.sampleMask = gl_SampleMask[0];\n";
// Ignore gl_SampleMask[0] value when rendering to a single-sampled framebuffer
copyStream << " output.sampleMask = (dx_Misc & 1) ? gl_SampleMask[0] : 0xFFFFFFFFu;\n";
}
declarationStream << "};\n"
@@ -416,11 +413,6 @@ std::string DynamicHLSL::generatePixelShaderForOutputSignature(
std::string pixelHLSL(sourceShader);
if (multisampling)
{
pixelHLSL.insert(0, "#define ANGLE_MULTISAMPLING\n");
}
bool success =
angle::ReplaceSubstring(&pixelHLSL, PIXEL_OUTPUT_STUB_STRING, declarationStream.str());
ASSERT(success);

View File

@@ -160,7 +160,7 @@ class DynamicHLSL : angle::NonCopyable
const std::vector<PixelShaderOutputVariable> &outputVariables,
FragDepthUsage fragDepthUsage,
bool usesSampleMask,
const std::pair<bool, const std::vector<GLenum>> &outputLayoutKey,
const std::vector<GLenum> &outputLayout,
const std::vector<rx::ShaderStorageBlock> &shaderStorageBlocks,
size_t baseUAVRegister) const;
std::string generateShaderForImage2DBindSignature(

View File

@@ -90,19 +90,17 @@ size_t GetMaxOutputIndex(const std::vector<PixelShaderOutputVariable> &shaderOut
}
void GetDefaultOutputLayoutFromShader(
bool multisampling,
const std::vector<PixelShaderOutputVariable> &shaderOutputVars,
std::pair<bool, std::vector<GLenum>> *outputLayoutOut)
std::vector<GLenum> *outputLayoutOut)
{
outputLayoutOut->first = multisampling;
outputLayoutOut->second.clear();
outputLayoutOut->clear();
if (!shaderOutputVars.empty())
{
size_t location = shaderOutputVars[0].outputLocation;
size_t maxIndex = GetMaxOutputIndex(shaderOutputVars, location);
outputLayoutOut->second.assign(maxIndex + 1,
GL_COLOR_ATTACHMENT0 + static_cast<unsigned int>(location));
outputLayoutOut->assign(maxIndex + 1,
GL_COLOR_ATTACHMENT0 + static_cast<unsigned int>(location));
}
}
@@ -682,9 +680,8 @@ bool ProgramD3D::VertexExecutable::matchesSignature(const Signature &signature)
return true;
}
ProgramD3D::PixelExecutable::PixelExecutable(
const std::pair<bool, const std::vector<GLenum>> &outputSignature,
ShaderExecutableD3D *shaderExecutable)
ProgramD3D::PixelExecutable::PixelExecutable(const std::vector<GLenum> &outputSignature,
ShaderExecutableD3D *shaderExecutable)
: mOutputSignature(outputSignature), mShaderExecutable(shaderExecutable)
{}
@@ -1229,7 +1226,6 @@ angle::Result ProgramD3D::loadBinaryShaderExecutables(d3d::Context *contextD3D,
size_t pixelShaderCount = stream->readInt<size_t>();
for (size_t pixelShaderIndex = 0; pixelShaderIndex < pixelShaderCount; pixelShaderIndex++)
{
bool multisampling = stream->readBool();
size_t outputCount = stream->readInt<size_t>();
std::vector<GLenum> outputs(outputCount);
for (size_t outputIndex = 0; outputIndex < outputCount; outputIndex++)
@@ -1252,8 +1248,8 @@ angle::Result ProgramD3D::loadBinaryShaderExecutables(d3d::Context *contextD3D,
}
// add new binary
mPixelExecutables.push_back(std::unique_ptr<PixelExecutable>(
new PixelExecutable({multisampling, outputs}, shaderExecutable)));
mPixelExecutables.push_back(
std::unique_ptr<PixelExecutable>(new PixelExecutable(outputs, shaderExecutable)));
stream->skip(pixelShaderSize);
}
@@ -1503,8 +1499,7 @@ void ProgramD3D::save(const gl::Context *context, gl::BinaryOutputStream *stream
{
PixelExecutable *pixelExecutable = mPixelExecutables[pixelExecutableIndex].get();
stream->writeBool(pixelExecutable->outputSignature().first);
const std::vector<GLenum> &outputs = pixelExecutable->outputSignature().second;
const std::vector<GLenum> &outputs = pixelExecutable->outputSignature();
stream->writeInt(outputs.size());
for (size_t outputIndex = 0; outputIndex < outputs.size(); outputIndex++)
{
@@ -1767,9 +1762,7 @@ class ProgramD3D::GetPixelExecutableTask : public ProgramD3D::GetExecutableTask
void ProgramD3D::updateCachedOutputLayoutFromShader()
{
// Assume multisampled rendering if a shader writes to gl_SampleMask.
GetDefaultOutputLayoutFromShader(mUsesSampleMask, mPixelShaderKey,
&mPixelShaderOutputLayoutCache);
GetDefaultOutputLayoutFromShader(mPixelShaderKey, &mPixelShaderOutputLayoutCache);
updateCachedPixelExecutableIndex();
}
@@ -3193,8 +3186,7 @@ void ProgramD3D::updateCachedInputLayout(UniqueSerial associatedSerial, const gl
void ProgramD3D::updateCachedOutputLayout(const gl::Context *context,
const gl::Framebuffer *framebuffer)
{
mPixelShaderOutputLayoutCache.first = framebuffer->getSamples(context) != 0;
mPixelShaderOutputLayoutCache.second.clear();
mPixelShaderOutputLayoutCache.clear();
FramebufferD3D *fboD3D = GetImplAs<FramebufferD3D>(framebuffer);
const auto &colorbuffers = fboD3D->getColorAttachmentsForRender(context);
@@ -3210,12 +3202,12 @@ void ProgramD3D::updateCachedOutputLayout(const gl::Context *context,
size_t maxIndex = binding != GL_NONE ? GetMaxOutputIndex(mPixelShaderKey,
binding - GL_COLOR_ATTACHMENT0)
: 0;
mPixelShaderOutputLayoutCache.second.insert(mPixelShaderOutputLayoutCache.second.end(),
maxIndex + 1, binding);
mPixelShaderOutputLayoutCache.insert(mPixelShaderOutputLayoutCache.end(), maxIndex + 1,
binding);
}
else
{
mPixelShaderOutputLayoutCache.second.push_back(GL_NONE);
mPixelShaderOutputLayoutCache.push_back(GL_NONE);
}
}

View File

@@ -412,24 +412,21 @@ class ProgramD3D : public ProgramImpl
class PixelExecutable
{
public:
PixelExecutable(const std::pair<bool, const std::vector<GLenum>> &outputSignature,
PixelExecutable(const std::vector<GLenum> &outputSignature,
ShaderExecutableD3D *shaderExecutable);
~PixelExecutable();
bool matchesSignature(const std::pair<bool, const std::vector<GLenum>> &signature) const
bool matchesSignature(const std::vector<GLenum> &signature) const
{
return mOutputSignature == signature;
}
const std::pair<bool, const std::vector<GLenum>> &outputSignature() const
{
return mOutputSignature;
}
const std::vector<GLenum> &outputSignature() const { return mOutputSignature; }
ShaderExecutableD3D *shaderExecutable() const { return mShaderExecutable; }
private:
const std::pair<bool, const std::vector<GLenum>> mOutputSignature;
const std::vector<GLenum> mOutputSignature;
ShaderExecutableD3D *mShaderExecutable;
};
@@ -582,7 +579,7 @@ class ProgramD3D : public ProgramImpl
gl::ShaderMap<gl::RangeUI> mUsedAtomicCounterRange;
// Cache for pixel shader output layout to save reallocations.
std::pair<bool, std::vector<GLenum>> mPixelShaderOutputLayoutCache;
std::vector<GLenum> mPixelShaderOutputLayoutCache;
Optional<size_t> mCachedPixelExecutableIndex;
AttribIndexArray mAttribLocationToD3DSemantic;

View File

@@ -565,6 +565,18 @@ bool ShaderConstants11::onClipDistancesEnabledChange(const uint32_t value)
return clipDistancesEnabledDirty;
}
bool ShaderConstants11::onMultisamplingChange(bool multisampling)
{
const bool multisamplingDirty =
((mPixel.misc & kPixelMiscMultisamplingMask) != 0) != multisampling;
if (multisamplingDirty)
{
mPixel.misc ^= kPixelMiscMultisamplingMask;
mShaderConstantsDirty.set(gl::ShaderType::Fragment);
}
return multisamplingDirty;
}
angle::Result ShaderConstants11::updateBuffer(const gl::Context *context,
Renderer11 *renderer,
gl::ShaderType shaderType,
@@ -1107,6 +1119,11 @@ void StateManager11::syncState(const gl::Context *context,
case gl::state::DIRTY_BIT_DRAW_FRAMEBUFFER_BINDING:
invalidateRenderTarget();
mFramebuffer11 = GetImplAs<Framebuffer11>(state.getDrawFramebuffer());
if (mShaderConstants.onMultisamplingChange(
state.getDrawFramebuffer()->getSamples(context) != 0))
{
invalidateDriverUniforms();
}
break;
case gl::state::DIRTY_BIT_VERTEX_ARRAY_BINDING:
invalidateVertexBuffer();

View File

@@ -55,6 +55,7 @@ class ShaderConstants11 : angle::NonCopyable
const gl::ImageUnit &imageUnit);
void onClipControlChange(bool lowerLeft, bool zeroToOne);
bool onClipDistancesEnabledChange(const uint32_t value);
bool onMultisamplingChange(bool multisampling);
angle::Result updateBuffer(const gl::Context *context,
Renderer11 *renderer,
@@ -104,16 +105,22 @@ class ShaderConstants11 : angle::NonCopyable
: depthRange{.0f},
viewCoords{.0f},
depthFront{.0f},
misc{0},
fragCoordOffset{.0f},
viewScale{.0f}
{}
float depthRange[4];
float viewCoords[4];
float depthFront[4];
float depthFront[3];
uint32_t misc;
float fragCoordOffset[2];
float viewScale[2];
};
// Packing information for pixel driver uniform's misc field:
// - 1 bit for whether multisampled rendering is used
// - 31 bits unused
static constexpr uint32_t kPixelMiscMultisamplingMask = 0x1;
static_assert(sizeof(Pixel) % 16u == 0, "D3D11 constant buffers must be multiples of 16 bytes");
struct Compute