mirror of
https://github.com/godotengine/godot-angle-static.git
synced 2026-01-06 02:09:55 +03:00
D3D11: Do not specialize HLSL for multisampled rendering
Pass the required state as a driver uniform instead of triggering fragment shader recompilation at draw calls. Bug: angleproject:8097 Bug: angleproject:8131 Fixed: chromium:1468993 Change-Id: I15825c0522b29d48eb2c3b341ba96f139ef26460 Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/4742579 Reviewed-by: Geoff Lang <geofflang@chromium.org> Commit-Queue: Alexey Knyazev <lexa.knyazev@gmail.com>
This commit is contained in:
committed by
Angle LUCI CQ
parent
cb9dd1c9b9
commit
335c6b86d7
@@ -721,14 +721,6 @@ void OutputHLSL::header(TInfoSinkBase &out,
|
||||
out << flatEvaluateFunction.functionDefinition << "\n";
|
||||
}
|
||||
}
|
||||
if (!mSampleEvaluateFunctions.empty())
|
||||
{
|
||||
out << "\n// EvaluateAttributeAtSample functions\n\n";
|
||||
for (const auto &sampleEvaluateFunction : mSampleEvaluateFunctions)
|
||||
{
|
||||
out << sampleEvaluateFunction.functionDefinition << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
if (mUsesDiscardRewriting)
|
||||
{
|
||||
@@ -921,6 +913,7 @@ void OutputHLSL::header(TInfoSinkBase &out,
|
||||
|
||||
if (mOutputType == SH_HLSL_4_1_OUTPUT)
|
||||
{
|
||||
out << " uint dx_Misc : packoffset(c2.w);\n";
|
||||
unsigned int registerIndex = 4;
|
||||
mResourcesHLSL->samplerMetadataUniforms(out, registerIndex);
|
||||
// Sampler metadata struct must be two 4-vec, 32 bytes.
|
||||
@@ -2824,12 +2817,18 @@ bool OutputHLSL::visitAggregate(Visit visit, TIntermAggregate *node)
|
||||
case EOpInterpolateAtSample:
|
||||
{
|
||||
TIntermTyped *interpolantNode = (*(node->getSequence()))[0]->getAsTyped();
|
||||
const TString &functionName =
|
||||
IsFlatInterpolant(interpolantNode)
|
||||
? addFlatEvaluateFunction(interpolantNode->getType(),
|
||||
*StaticType::GetBasic<EbtInt, EbpUndefined, 1>())
|
||||
: addSampleEvaluateFunction(interpolantNode->getType());
|
||||
outputTriplet(out, visit, (functionName + "(").c_str(), ", ", ")");
|
||||
if (!IsFlatInterpolant(interpolantNode))
|
||||
{
|
||||
mUsesNumSamples = true;
|
||||
outputTriplet(out, visit, "EvaluateAttributeAtSample(", ", clamp(",
|
||||
", 0, gl_NumSamples - 1))");
|
||||
}
|
||||
else
|
||||
{
|
||||
const TString &functionName = addFlatEvaluateFunction(
|
||||
interpolantNode->getType(), *StaticType::GetBasic<EbtInt, EbpUndefined, 1>());
|
||||
outputTriplet(out, visit, (functionName + "(").c_str(), ", ", ")");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case EOpInterpolateAtOffset:
|
||||
@@ -3914,43 +3913,6 @@ TString OutputHLSL::addFlatEvaluateFunction(const TType &type, const TType ¶
|
||||
return function.functionName;
|
||||
}
|
||||
|
||||
TString OutputHLSL::addSampleEvaluateFunction(const TType &type)
|
||||
{
|
||||
for (const auto &sampleEvaluateFunction : mSampleEvaluateFunctions)
|
||||
{
|
||||
if (sampleEvaluateFunction.type == type)
|
||||
{
|
||||
return sampleEvaluateFunction.functionName;
|
||||
}
|
||||
}
|
||||
|
||||
SampleEvaluateFunction function;
|
||||
function.type = type;
|
||||
|
||||
const TString &typeName = TypeString(type);
|
||||
|
||||
function.functionName = "angle_eval_sample_" + typeName;
|
||||
|
||||
// If multisample buffers are not available, the input varying
|
||||
// must be evaluated at the center of the pixel. Direct3D does
|
||||
// not support single-sampled rendering, thus specializing the
|
||||
// function is needed.
|
||||
TInfoSinkBase fnOut;
|
||||
fnOut << typeName << " " << function.functionName << "(" << typeName << " i, uint s)\n";
|
||||
fnOut << "{\n"
|
||||
<< "#ifdef ANGLE_MULTISAMPLING\n"
|
||||
<< " return EvaluateAttributeAtSample(i, s);\n"
|
||||
<< "#else\n"
|
||||
<< " return i;\n"
|
||||
<< "#endif\n"
|
||||
<< "}\n";
|
||||
function.functionDefinition = fnOut.c_str();
|
||||
|
||||
mSampleEvaluateFunctions.push_back(function);
|
||||
|
||||
return function.functionName;
|
||||
}
|
||||
|
||||
void OutputHLSL::ensureStructDefined(const TType &type)
|
||||
{
|
||||
const TStructure *structure = type.getStruct();
|
||||
|
||||
@@ -151,7 +151,6 @@ class OutputHLSL : public TIntermTraverser
|
||||
TString addArrayAssignmentFunction(const TType &type);
|
||||
TString addArrayConstructIntoFunction(const TType &type);
|
||||
TString addFlatEvaluateFunction(const TType &type, const TType ¶meterType);
|
||||
TString addSampleEvaluateFunction(const TType &type);
|
||||
|
||||
// Ensures if the type is a struct, the struct is defined
|
||||
void ensureStructDefined(const TType &type);
|
||||
@@ -286,12 +285,6 @@ class OutputHLSL : public TIntermTraverser
|
||||
};
|
||||
std::vector<FlatEvaluateFunction> mFlatEvaluateFunctions;
|
||||
|
||||
struct SampleEvaluateFunction : public HelperFunction
|
||||
{
|
||||
TType type;
|
||||
};
|
||||
std::vector<SampleEvaluateFunction> mSampleEvaluateFunctions;
|
||||
|
||||
sh::WorkGroupSize mWorkGroupSize;
|
||||
|
||||
PerformanceDiagnostics *mPerfDiagnostics;
|
||||
|
||||
@@ -320,7 +320,7 @@ std::string DynamicHLSL::generatePixelShaderForOutputSignature(
|
||||
const std::vector<PixelShaderOutputVariable> &outputVariables,
|
||||
FragDepthUsage fragDepthUsage,
|
||||
bool usesSampleMask,
|
||||
const std::pair<bool, const std::vector<GLenum>> &outputLayoutKey,
|
||||
const std::vector<GLenum> &outputLayout,
|
||||
const std::vector<ShaderStorageBlock> &shaderStorageBlocks,
|
||||
size_t baseUAVRegister) const
|
||||
{
|
||||
@@ -342,9 +342,6 @@ std::string DynamicHLSL::generatePixelShaderForOutputSignature(
|
||||
}
|
||||
}();
|
||||
|
||||
const bool multisampling = outputLayoutKey.first;
|
||||
const std::vector<GLenum> &outputLayout = outputLayoutKey.second;
|
||||
|
||||
std::ostringstream declarationStream;
|
||||
std::ostringstream copyStream;
|
||||
|
||||
@@ -398,11 +395,11 @@ std::string DynamicHLSL::generatePixelShaderForOutputSignature(
|
||||
copyStream << " output.gl_Depth = gl_Depth; \n";
|
||||
}
|
||||
|
||||
// Do not write to SV_Coverage when rendering to single-sampled targets
|
||||
if (multisampling && usesSampleMask)
|
||||
if (usesSampleMask)
|
||||
{
|
||||
declarationStream << " uint sampleMask : SV_Coverage;\n";
|
||||
copyStream << " output.sampleMask = gl_SampleMask[0];\n";
|
||||
// Ignore gl_SampleMask[0] value when rendering to a single-sampled framebuffer
|
||||
copyStream << " output.sampleMask = (dx_Misc & 1) ? gl_SampleMask[0] : 0xFFFFFFFFu;\n";
|
||||
}
|
||||
|
||||
declarationStream << "};\n"
|
||||
@@ -416,11 +413,6 @@ std::string DynamicHLSL::generatePixelShaderForOutputSignature(
|
||||
|
||||
std::string pixelHLSL(sourceShader);
|
||||
|
||||
if (multisampling)
|
||||
{
|
||||
pixelHLSL.insert(0, "#define ANGLE_MULTISAMPLING\n");
|
||||
}
|
||||
|
||||
bool success =
|
||||
angle::ReplaceSubstring(&pixelHLSL, PIXEL_OUTPUT_STUB_STRING, declarationStream.str());
|
||||
ASSERT(success);
|
||||
|
||||
@@ -160,7 +160,7 @@ class DynamicHLSL : angle::NonCopyable
|
||||
const std::vector<PixelShaderOutputVariable> &outputVariables,
|
||||
FragDepthUsage fragDepthUsage,
|
||||
bool usesSampleMask,
|
||||
const std::pair<bool, const std::vector<GLenum>> &outputLayoutKey,
|
||||
const std::vector<GLenum> &outputLayout,
|
||||
const std::vector<rx::ShaderStorageBlock> &shaderStorageBlocks,
|
||||
size_t baseUAVRegister) const;
|
||||
std::string generateShaderForImage2DBindSignature(
|
||||
|
||||
@@ -90,19 +90,17 @@ size_t GetMaxOutputIndex(const std::vector<PixelShaderOutputVariable> &shaderOut
|
||||
}
|
||||
|
||||
void GetDefaultOutputLayoutFromShader(
|
||||
bool multisampling,
|
||||
const std::vector<PixelShaderOutputVariable> &shaderOutputVars,
|
||||
std::pair<bool, std::vector<GLenum>> *outputLayoutOut)
|
||||
std::vector<GLenum> *outputLayoutOut)
|
||||
{
|
||||
outputLayoutOut->first = multisampling;
|
||||
outputLayoutOut->second.clear();
|
||||
outputLayoutOut->clear();
|
||||
|
||||
if (!shaderOutputVars.empty())
|
||||
{
|
||||
size_t location = shaderOutputVars[0].outputLocation;
|
||||
size_t maxIndex = GetMaxOutputIndex(shaderOutputVars, location);
|
||||
outputLayoutOut->second.assign(maxIndex + 1,
|
||||
GL_COLOR_ATTACHMENT0 + static_cast<unsigned int>(location));
|
||||
outputLayoutOut->assign(maxIndex + 1,
|
||||
GL_COLOR_ATTACHMENT0 + static_cast<unsigned int>(location));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -682,9 +680,8 @@ bool ProgramD3D::VertexExecutable::matchesSignature(const Signature &signature)
|
||||
return true;
|
||||
}
|
||||
|
||||
ProgramD3D::PixelExecutable::PixelExecutable(
|
||||
const std::pair<bool, const std::vector<GLenum>> &outputSignature,
|
||||
ShaderExecutableD3D *shaderExecutable)
|
||||
ProgramD3D::PixelExecutable::PixelExecutable(const std::vector<GLenum> &outputSignature,
|
||||
ShaderExecutableD3D *shaderExecutable)
|
||||
: mOutputSignature(outputSignature), mShaderExecutable(shaderExecutable)
|
||||
{}
|
||||
|
||||
@@ -1229,7 +1226,6 @@ angle::Result ProgramD3D::loadBinaryShaderExecutables(d3d::Context *contextD3D,
|
||||
size_t pixelShaderCount = stream->readInt<size_t>();
|
||||
for (size_t pixelShaderIndex = 0; pixelShaderIndex < pixelShaderCount; pixelShaderIndex++)
|
||||
{
|
||||
bool multisampling = stream->readBool();
|
||||
size_t outputCount = stream->readInt<size_t>();
|
||||
std::vector<GLenum> outputs(outputCount);
|
||||
for (size_t outputIndex = 0; outputIndex < outputCount; outputIndex++)
|
||||
@@ -1252,8 +1248,8 @@ angle::Result ProgramD3D::loadBinaryShaderExecutables(d3d::Context *contextD3D,
|
||||
}
|
||||
|
||||
// add new binary
|
||||
mPixelExecutables.push_back(std::unique_ptr<PixelExecutable>(
|
||||
new PixelExecutable({multisampling, outputs}, shaderExecutable)));
|
||||
mPixelExecutables.push_back(
|
||||
std::unique_ptr<PixelExecutable>(new PixelExecutable(outputs, shaderExecutable)));
|
||||
|
||||
stream->skip(pixelShaderSize);
|
||||
}
|
||||
@@ -1503,8 +1499,7 @@ void ProgramD3D::save(const gl::Context *context, gl::BinaryOutputStream *stream
|
||||
{
|
||||
PixelExecutable *pixelExecutable = mPixelExecutables[pixelExecutableIndex].get();
|
||||
|
||||
stream->writeBool(pixelExecutable->outputSignature().first);
|
||||
const std::vector<GLenum> &outputs = pixelExecutable->outputSignature().second;
|
||||
const std::vector<GLenum> &outputs = pixelExecutable->outputSignature();
|
||||
stream->writeInt(outputs.size());
|
||||
for (size_t outputIndex = 0; outputIndex < outputs.size(); outputIndex++)
|
||||
{
|
||||
@@ -1767,9 +1762,7 @@ class ProgramD3D::GetPixelExecutableTask : public ProgramD3D::GetExecutableTask
|
||||
|
||||
void ProgramD3D::updateCachedOutputLayoutFromShader()
|
||||
{
|
||||
// Assume multisampled rendering if a shader writes to gl_SampleMask.
|
||||
GetDefaultOutputLayoutFromShader(mUsesSampleMask, mPixelShaderKey,
|
||||
&mPixelShaderOutputLayoutCache);
|
||||
GetDefaultOutputLayoutFromShader(mPixelShaderKey, &mPixelShaderOutputLayoutCache);
|
||||
updateCachedPixelExecutableIndex();
|
||||
}
|
||||
|
||||
@@ -3193,8 +3186,7 @@ void ProgramD3D::updateCachedInputLayout(UniqueSerial associatedSerial, const gl
|
||||
void ProgramD3D::updateCachedOutputLayout(const gl::Context *context,
|
||||
const gl::Framebuffer *framebuffer)
|
||||
{
|
||||
mPixelShaderOutputLayoutCache.first = framebuffer->getSamples(context) != 0;
|
||||
mPixelShaderOutputLayoutCache.second.clear();
|
||||
mPixelShaderOutputLayoutCache.clear();
|
||||
|
||||
FramebufferD3D *fboD3D = GetImplAs<FramebufferD3D>(framebuffer);
|
||||
const auto &colorbuffers = fboD3D->getColorAttachmentsForRender(context);
|
||||
@@ -3210,12 +3202,12 @@ void ProgramD3D::updateCachedOutputLayout(const gl::Context *context,
|
||||
size_t maxIndex = binding != GL_NONE ? GetMaxOutputIndex(mPixelShaderKey,
|
||||
binding - GL_COLOR_ATTACHMENT0)
|
||||
: 0;
|
||||
mPixelShaderOutputLayoutCache.second.insert(mPixelShaderOutputLayoutCache.second.end(),
|
||||
maxIndex + 1, binding);
|
||||
mPixelShaderOutputLayoutCache.insert(mPixelShaderOutputLayoutCache.end(), maxIndex + 1,
|
||||
binding);
|
||||
}
|
||||
else
|
||||
{
|
||||
mPixelShaderOutputLayoutCache.second.push_back(GL_NONE);
|
||||
mPixelShaderOutputLayoutCache.push_back(GL_NONE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -412,24 +412,21 @@ class ProgramD3D : public ProgramImpl
|
||||
class PixelExecutable
|
||||
{
|
||||
public:
|
||||
PixelExecutable(const std::pair<bool, const std::vector<GLenum>> &outputSignature,
|
||||
PixelExecutable(const std::vector<GLenum> &outputSignature,
|
||||
ShaderExecutableD3D *shaderExecutable);
|
||||
~PixelExecutable();
|
||||
|
||||
bool matchesSignature(const std::pair<bool, const std::vector<GLenum>> &signature) const
|
||||
bool matchesSignature(const std::vector<GLenum> &signature) const
|
||||
{
|
||||
return mOutputSignature == signature;
|
||||
}
|
||||
|
||||
const std::pair<bool, const std::vector<GLenum>> &outputSignature() const
|
||||
{
|
||||
return mOutputSignature;
|
||||
}
|
||||
const std::vector<GLenum> &outputSignature() const { return mOutputSignature; }
|
||||
|
||||
ShaderExecutableD3D *shaderExecutable() const { return mShaderExecutable; }
|
||||
|
||||
private:
|
||||
const std::pair<bool, const std::vector<GLenum>> mOutputSignature;
|
||||
const std::vector<GLenum> mOutputSignature;
|
||||
ShaderExecutableD3D *mShaderExecutable;
|
||||
};
|
||||
|
||||
@@ -582,7 +579,7 @@ class ProgramD3D : public ProgramImpl
|
||||
gl::ShaderMap<gl::RangeUI> mUsedAtomicCounterRange;
|
||||
|
||||
// Cache for pixel shader output layout to save reallocations.
|
||||
std::pair<bool, std::vector<GLenum>> mPixelShaderOutputLayoutCache;
|
||||
std::vector<GLenum> mPixelShaderOutputLayoutCache;
|
||||
Optional<size_t> mCachedPixelExecutableIndex;
|
||||
|
||||
AttribIndexArray mAttribLocationToD3DSemantic;
|
||||
|
||||
@@ -565,6 +565,18 @@ bool ShaderConstants11::onClipDistancesEnabledChange(const uint32_t value)
|
||||
return clipDistancesEnabledDirty;
|
||||
}
|
||||
|
||||
bool ShaderConstants11::onMultisamplingChange(bool multisampling)
|
||||
{
|
||||
const bool multisamplingDirty =
|
||||
((mPixel.misc & kPixelMiscMultisamplingMask) != 0) != multisampling;
|
||||
if (multisamplingDirty)
|
||||
{
|
||||
mPixel.misc ^= kPixelMiscMultisamplingMask;
|
||||
mShaderConstantsDirty.set(gl::ShaderType::Fragment);
|
||||
}
|
||||
return multisamplingDirty;
|
||||
}
|
||||
|
||||
angle::Result ShaderConstants11::updateBuffer(const gl::Context *context,
|
||||
Renderer11 *renderer,
|
||||
gl::ShaderType shaderType,
|
||||
@@ -1107,6 +1119,11 @@ void StateManager11::syncState(const gl::Context *context,
|
||||
case gl::state::DIRTY_BIT_DRAW_FRAMEBUFFER_BINDING:
|
||||
invalidateRenderTarget();
|
||||
mFramebuffer11 = GetImplAs<Framebuffer11>(state.getDrawFramebuffer());
|
||||
if (mShaderConstants.onMultisamplingChange(
|
||||
state.getDrawFramebuffer()->getSamples(context) != 0))
|
||||
{
|
||||
invalidateDriverUniforms();
|
||||
}
|
||||
break;
|
||||
case gl::state::DIRTY_BIT_VERTEX_ARRAY_BINDING:
|
||||
invalidateVertexBuffer();
|
||||
|
||||
@@ -55,6 +55,7 @@ class ShaderConstants11 : angle::NonCopyable
|
||||
const gl::ImageUnit &imageUnit);
|
||||
void onClipControlChange(bool lowerLeft, bool zeroToOne);
|
||||
bool onClipDistancesEnabledChange(const uint32_t value);
|
||||
bool onMultisamplingChange(bool multisampling);
|
||||
|
||||
angle::Result updateBuffer(const gl::Context *context,
|
||||
Renderer11 *renderer,
|
||||
@@ -104,16 +105,22 @@ class ShaderConstants11 : angle::NonCopyable
|
||||
: depthRange{.0f},
|
||||
viewCoords{.0f},
|
||||
depthFront{.0f},
|
||||
misc{0},
|
||||
fragCoordOffset{.0f},
|
||||
viewScale{.0f}
|
||||
{}
|
||||
|
||||
float depthRange[4];
|
||||
float viewCoords[4];
|
||||
float depthFront[4];
|
||||
float depthFront[3];
|
||||
uint32_t misc;
|
||||
float fragCoordOffset[2];
|
||||
float viewScale[2];
|
||||
};
|
||||
// Packing information for pixel driver uniform's misc field:
|
||||
// - 1 bit for whether multisampled rendering is used
|
||||
// - 31 bits unused
|
||||
static constexpr uint32_t kPixelMiscMultisamplingMask = 0x1;
|
||||
static_assert(sizeof(Pixel) % 16u == 0, "D3D11 constant buffers must be multiples of 16 bytes");
|
||||
|
||||
struct Compute
|
||||
|
||||
Reference in New Issue
Block a user