From 152cf62b38874238095a91307e4ea9bcdedf8f46 Mon Sep 17 00:00:00 2001 From: Charlie Lao Date: Tue, 15 Aug 2023 13:43:12 -0700 Subject: [PATCH] Tightly pack LinkedUniform by using int16_t There is a check of vector size when we link uniforms and the maximum vector size is 4096 due to we clamp the maxUniformBlockSize to 64KB. In reality, if we exceeds this number, program link will take really long time and then hit failure. So there is no real need to keep all the variables in 32 bit integer. This CL changes to 16 bit integer. Further, sh::BlockMemberInfo and ActiveVariable data members are embeded into LinkedUniform struct as well so that the unused variables can be removed and data can be tightly packed. This also makes LinkedUniform easier to maintain as a simple struct with basic data types. With this change, LinkedUniform size is reduced from 108 bytes down to 60 bytes, 48 bytes reduction. Given some apps has 200-ish uniforms, this CL reduces 48 bytes x 200 = ~9K memory just for uniforms per program (which goes through hash compute and decompression and file reads). Bug: b/275102061 Change-Id: I7fae20f5b75f3239305e2094a992e3040b8c8e4c Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/4754133 Reviewed-by: Shahbaz Youssefi Commit-Queue: Charlie Lao --- src/common/angleutils.h | 11 ++- src/libANGLE/Program.cpp | 2 +- src/libANGLE/ProgramExecutable.cpp | 23 +++--- src/libANGLE/Uniform.cpp | 96 ++++++++++++------------ src/libANGLE/Uniform.h | 95 +++++++++++++---------- src/libANGLE/queryutils.cpp | 8 +- src/libANGLE/renderer/d3d/ProgramD3D.cpp | 4 +- src/libANGLE/renderer/renderer_utils.h | 9 --- 8 files changed, 130 insertions(+), 118 deletions(-) diff --git a/src/common/angleutils.h b/src/common/angleutils.h index 529f4429c..8a6ecec6b 100644 --- a/src/common/angleutils.h +++ b/src/common/angleutils.h @@ -60,7 +60,7 @@ template > using HashMap = std::unordered_map; template , class KeyEqual = std::equal_to> -using HashSet = std::unordered_set; +using HashSet = std::unordered_set; # if __cpp_lib_generic_unordered_lookup >= 201811L # define ANGLE_HAS_HASH_MAP_GENERIC_LOOKUP 1 # else @@ -401,6 +401,15 @@ class ConditionalMutex final : angle::NonCopyable bool mUseMutex; }; +// Helper macro that casts to a bitfield type then verifies no bits were dropped. +#define SetBitField(lhs, rhs) \ + do \ + { \ + auto ANGLE_LOCAL_VAR = rhs; \ + lhs = static_cast::type>(ANGLE_LOCAL_VAR); \ + ASSERT(static_cast(lhs) == ANGLE_LOCAL_VAR); \ + } while (0) + // snprintf is not defined with MSVC prior to to msvc14 #if defined(_MSC_VER) && _MSC_VER < 1900 # define snprintf _snprintf diff --git a/src/libANGLE/Program.cpp b/src/libANGLE/Program.cpp index 843fac4da..37ebb5eeb 100644 --- a/src/libANGLE/Program.cpp +++ b/src/libANGLE/Program.cpp @@ -471,7 +471,7 @@ void LoadShaderVariableBuffer(BinaryInputStream *stream, ShaderVariableBuffer *v { var->memberIndexes.resize(numMembers); stream->readBytes(reinterpret_cast(var->memberIndexes.data()), - sizeof(unsigned int) * var->memberIndexes.size()); + sizeof(*var->memberIndexes.data()) * var->memberIndexes.size()); } } diff --git a/src/libANGLE/ProgramExecutable.cpp b/src/libANGLE/ProgramExecutable.cpp index 20574f871..570369e43 100644 --- a/src/libANGLE/ProgramExecutable.cpp +++ b/src/libANGLE/ProgramExecutable.cpp @@ -1658,9 +1658,9 @@ void ProgramExecutable::linkSamplerAndImageBindings(GLuint *combinedImageUniform { // The arrays of arrays are flattened to arrays, it needs to record the array offset for // the correct binding image unit. - mImageBindings.emplace_back(ImageBinding( - imageUniform.getBinding() + imageUniform.parentArrayIndex() * arraySize, - imageUniform.getBasicTypeElementCount(), textureType)); + mImageBindings.emplace_back( + ImageBinding(imageUniform.getBinding() + imageUniform.parentArrayIndex * arraySize, + imageUniform.getBasicTypeElementCount(), textureType)); } *combinedImageUniforms += imageUniform.activeShaderCount() * arraySize; @@ -1697,13 +1697,14 @@ bool ProgramExecutable::linkAtomicCounterBuffers(const Context *context, InfoLog { auto &uniform = mUniforms[index]; - uniform.blockInfo.offset = uniform.getOffset(); - uniform.blockInfo.arrayStride = uniform.isArray() ? 4 : 0; - uniform.blockInfo.matrixStride = 0; - uniform.blockInfo.isRowMajorMatrix = false; + uniform.blockOffset = uniform.getOffset(); + uniform.blockArrayStride = uniform.isArray() ? 4 : 0; + uniform.blockMatrixStride = 0; + uniform.flagBits.blockIsRowMajorMatrix = false; + uniform.flagBits.isBlock = true; bool found = false; - for (unsigned int bufferIndex = 0; bufferIndex < getActiveAtomicCounterBufferCount(); + for (uint16_t bufferIndex = 0; bufferIndex < getActiveAtomicCounterBufferCount(); ++bufferIndex) { auto &buffer = mAtomicCounterBuffers[bufferIndex]; @@ -1712,7 +1713,7 @@ bool ProgramExecutable::linkAtomicCounterBuffers(const Context *context, InfoLog buffer.memberIndexes.push_back(index); uniform.bufferIndex = bufferIndex; found = true; - buffer.unionReferencesWith(uniform.activeVariable); + buffer.unionReferencesWith(uniform); break; } } @@ -1721,9 +1722,9 @@ bool ProgramExecutable::linkAtomicCounterBuffers(const Context *context, InfoLog AtomicCounterBuffer atomicCounterBuffer; atomicCounterBuffer.binding = uniform.getBinding(); atomicCounterBuffer.memberIndexes.push_back(index); - atomicCounterBuffer.unionReferencesWith(uniform.activeVariable); + atomicCounterBuffer.unionReferencesWith(uniform); mAtomicCounterBuffers.push_back(atomicCounterBuffer); - uniform.bufferIndex = static_cast(getActiveAtomicCounterBufferCount() - 1); + uniform.bufferIndex = static_cast(getActiveAtomicCounterBufferCount() - 1); } } diff --git a/src/libANGLE/Uniform.cpp b/src/libANGLE/Uniform.cpp index 5a8e3869d..9e11741c0 100644 --- a/src/libANGLE/Uniform.cpp +++ b/src/libANGLE/Uniform.cpp @@ -30,26 +30,21 @@ void ActiveVariable::setActive(ShaderType shaderType, bool used, uint32_t id) mIds[shaderType] = id; } -void ActiveVariable::unionReferencesWith(const ActiveVariable &other) +void ActiveVariable::unionReferencesWith(const LinkedUniform &other) { mActiveUseBits |= other.mActiveUseBits; for (const ShaderType shaderType : AllShaderTypes()) { - ASSERT(mIds[shaderType] == 0 || other.mIds[shaderType] == 0 || - mIds[shaderType] == other.mIds[shaderType]); + ASSERT(mIds[shaderType] == 0 || other.getId(shaderType) == 0 || + mIds[shaderType] == other.getId(shaderType)); if (mIds[shaderType] == 0) { - mIds[shaderType] = other.mIds[shaderType]; + mIds[shaderType] = other.getId(shaderType); } } } -LinkedUniform::LinkedUniform() -{ - memset(this, 0, sizeof(*this)); - blockInfo = sh::BlockMemberInfo(); - activeVariable = ActiveVariable(); -} +LinkedUniform::LinkedUniform() {} LinkedUniform::LinkedUniform(GLenum typeIn, GLenum precisionIn, @@ -60,26 +55,27 @@ LinkedUniform::LinkedUniform(GLenum typeIn, const int bufferIndexIn, const sh::BlockMemberInfo &blockInfoIn) { - memset(this, 0, sizeof(*this)); - // Note: Ensure every data member is initialized. - type = typeIn; - precision = precisionIn; - imageUnitFormat = GL_NONE; - location = locationIn; - binding = bindingIn; - offset = offsetIn; - bufferIndex = bufferIndexIn; - blockInfo = blockInfoIn; - activeVariable = ActiveVariable(); - id = 0; - flattenedOffsetInParentArrays = -1; - outerArraySizeProduct = 1; - outerArrayOffset = 0; - arraySize = arraySizesIn.empty() ? 1 : arraySizesIn[0]; - - flagBitsAsUInt = 0; - flagBits.isArray = !arraySizesIn.empty(); + // arrays are always flattened, which means at most 1D array ASSERT(arraySizesIn.size() <= 1); + + memset(this, 0, sizeof(*this)); + SetBitField(type, typeIn); + SetBitField(precision, precisionIn); + location = locationIn; + SetBitField(binding, bindingIn); + SetBitField(offset, offsetIn); + SetBitField(bufferIndex, bufferIndexIn); + outerArraySizeProduct = 1; + SetBitField(arraySize, arraySizesIn.empty() ? 1u : arraySizesIn[0]); + SetBitField(flagBits.isArray, !arraySizesIn.empty()); + if (!(blockInfoIn == sh::kDefaultBlockMemberInfo)) + { + flagBits.isBlock = 1; + flagBits.blockIsRowMajorMatrix = blockInfoIn.isRowMajorMatrix; + SetBitField(blockOffset, blockInfoIn.offset); + SetBitField(blockArrayStride, blockInfoIn.arrayStride); + SetBitField(blockMatrixStride, blockInfoIn.matrixStride); + } } LinkedUniform::LinkedUniform(const LinkedUniform &other) @@ -89,32 +85,34 @@ LinkedUniform::LinkedUniform(const LinkedUniform &other) LinkedUniform::LinkedUniform(const UsedUniform &usedUniform) { - memset(this, 0, sizeof(*this)); - ASSERT(!usedUniform.isArrayOfArrays()); ASSERT(!usedUniform.isStruct()); ASSERT(usedUniform.active); + ASSERT(usedUniform.blockInfo == sh::kDefaultBlockMemberInfo); // Note: Ensure every data member is initialized. - type = usedUniform.type; - precision = usedUniform.precision; - imageUnitFormat = usedUniform.imageUnitFormat; - location = usedUniform.location; - binding = usedUniform.binding; - offset = usedUniform.offset; - bufferIndex = usedUniform.bufferIndex; - blockInfo = usedUniform.blockInfo; - activeVariable = usedUniform.activeVariable; - id = usedUniform.id; - flattenedOffsetInParentArrays = usedUniform.getFlattenedOffsetInParentArrays(); - outerArraySizeProduct = ArraySizeProduct(usedUniform.outerArraySizes); - outerArrayOffset = usedUniform.outerArrayOffset; - arraySize = usedUniform.isArray() ? usedUniform.getArraySizeProduct() : 1u; + flagBitsAsUByte = 0; + SetBitField(type, usedUniform.type); + SetBitField(precision, usedUniform.precision); + SetBitField(imageUnitFormat, usedUniform.imageUnitFormat); + location = usedUniform.location; + SetBitField(binding, usedUniform.binding); + SetBitField(offset, usedUniform.offset); - flagBitsAsUInt = 0; - flagBits.isFragmentInOut = usedUniform.isFragmentInOut; - flagBits.texelFetchStaticUse = usedUniform.texelFetchStaticUse; - flagBits.isArray = usedUniform.isArray(); + SetBitField(bufferIndex, usedUniform.bufferIndex); + SetBitField(parentArrayIndex, usedUniform.parentArrayIndex()); + SetBitField(outerArraySizeProduct, ArraySizeProduct(usedUniform.outerArraySizes)); + SetBitField(outerArrayOffset, usedUniform.outerArrayOffset); + SetBitField(arraySize, usedUniform.isArray() ? usedUniform.getArraySizeProduct() : 1u); + SetBitField(flagBits.isArray, usedUniform.isArray()); + + id = usedUniform.id; + mActiveUseBits = usedUniform.activeVariable.activeShaders(); + mIds = usedUniform.activeVariable.getIds(); + + SetBitField(flagBits.isFragmentInOut, usedUniform.isFragmentInOut); + SetBitField(flagBits.texelFetchStaticUse, usedUniform.texelFetchStaticUse); + ASSERT(!usedUniform.isArray() || arraySize == usedUniform.getArraySizeProduct()); } LinkedUniform::~LinkedUniform() {} diff --git a/src/libANGLE/Uniform.h b/src/libANGLE/Uniform.h index 7a5ff1e01..40fc82cf9 100644 --- a/src/libANGLE/Uniform.h +++ b/src/libANGLE/Uniform.h @@ -23,6 +23,7 @@ class BinaryInputStream; class BinaryOutputStream; struct UniformTypeInfo; struct UsedUniform; +struct LinkedUniform; // Note: keep this struct memcpy-able: i.e, a simple struct with basic types only and no virtual // functions. LinkedUniform relies on this so that it can use memcpy to initialize uniform for @@ -40,7 +41,7 @@ struct ActiveVariable return static_cast(ScanForward(mActiveUseBits.bits())); } void setActive(ShaderType shaderType, bool used, uint32_t id); - void unionReferencesWith(const ActiveVariable &other); + void unionReferencesWith(const LinkedUniform &otherUniform); bool isActive(ShaderType shaderType) const { ASSERT(shaderType != ShaderType::InvalidEnum); @@ -49,7 +50,6 @@ struct ActiveVariable const ShaderMap &getIds() const { return mIds; } uint32_t getId(ShaderType shaderType) const { return mIds[shaderType]; } ShaderBitSet activeShaders() const { return mActiveUseBits; } - GLuint activeShaderCount() const { return static_cast(mActiveUseBits.count()); } private: ShaderBitSet mActiveUseBits; @@ -62,6 +62,7 @@ struct ActiveVariable // not put any std::vector or objects with virtual functions in it. // Helper struct representing a single shader uniform. Most of this structure's data member and // access functions mirrors ShaderVariable; See ShaderVars.h for more info. +ANGLE_ENABLE_STRUCT_PADDING_WARNINGS struct LinkedUniform { LinkedUniform(); @@ -88,70 +89,82 @@ struct LinkedUniform bool isFragmentInOut() const { return flagBits.isFragmentInOut; } bool isArray() const { return flagBits.isArray; } - unsigned int getBasicTypeElementCount() const + uint16_t getBasicTypeElementCount() const { ASSERT(flagBits.isArray || arraySize == 1u); return arraySize; } GLenum getType() const { return type; } - unsigned int getOuterArrayOffset() const { return outerArrayOffset; } - unsigned int getOuterArraySizeProduct() const { return outerArraySizeProduct; } - int getBinding() const { return binding; } - int getOffset() const { return offset; } - const sh::BlockMemberInfo &getBlockInfo() const { return blockInfo; } + uint16_t getOuterArrayOffset() const { return outerArrayOffset; } + uint16_t getOuterArraySizeProduct() const { return outerArraySizeProduct; } + int16_t getBinding() const { return binding; } + int16_t getOffset() const { return offset; } int getBufferIndex() const { return bufferIndex; } int getLocation() const { return location; } GLenum getImageUnitFormat() const { return imageUnitFormat; } - int parentArrayIndex() const - { - return flattenedOffsetInParentArrays != -1 ? flattenedOffsetInParentArrays : 0; - } - ShaderType getFirstActiveShaderType() const { - return activeVariable.getFirstActiveShaderType(); + return static_cast(ScanForward(mActiveUseBits.bits())); } void setActive(ShaderType shaderType, bool used, uint32_t _id) { - activeVariable.setActive(shaderType, used, _id); + mActiveUseBits.set(shaderType, used); + mIds[shaderType] = id; } - bool isActive(ShaderType shaderType) const { return activeVariable.isActive(shaderType); } - const ShaderMap &getIds() const { return activeVariable.getIds(); } - uint32_t getId(ShaderType shaderType) const { return activeVariable.getId(shaderType); } - ShaderBitSet activeShaders() const { return activeVariable.activeShaders(); } - GLuint activeShaderCount() const { return activeVariable.activeShaderCount(); } + bool isActive(ShaderType shaderType) const { return mActiveUseBits[shaderType]; } + const ShaderMap &getIds() const { return mIds; } + uint32_t getId(ShaderType shaderType) const { return mIds[shaderType]; } + ShaderBitSet activeShaders() const { return mActiveUseBits; } + GLuint activeShaderCount() const { return static_cast(mActiveUseBits.count()); } - sh::BlockMemberInfo blockInfo; - ActiveVariable activeVariable; + uint16_t type; + uint16_t precision; - GLenum type; - GLenum precision; - GLenum imageUnitFormat; int location; - int binding; - int offset; - uint32_t id; - int flattenedOffsetInParentArrays; - int bufferIndex; - unsigned int outerArraySizeProduct; - unsigned int outerArrayOffset; - unsigned int arraySize; + // These are from sh::struct BlockMemberInfo struct. See locklayout.h for detail. + uint16_t blockOffset; + uint16_t blockArrayStride; + + uint16_t blockMatrixStride; + uint16_t imageUnitFormat; + + // maxUniformVectorsCount is 4K due to we clamp maxUniformBlockSize to 64KB. All of these + // variable should be enough to pack into 16 bits to reduce the size of mUniforms. + int16_t binding; + int16_t bufferIndex; + + int16_t offset; + uint16_t arraySize; + + uint16_t outerArraySizeProduct; + uint16_t outerArrayOffset; + + uint16_t parentArrayIndex; union { struct { - uint32_t isFragmentInOut : 1; - uint32_t texelFetchStaticUse : 1; - uint32_t isArray : 1; - uint32_t padding : 29; + uint8_t isFragmentInOut : 1; + uint8_t texelFetchStaticUse : 1; + uint8_t isArray : 1; + uint8_t blockIsRowMajorMatrix : 1; + uint8_t isBlock : 1; + uint8_t padding : 3; } flagBits; - - uint32_t flagBitsAsUInt; + uint8_t flagBitsAsUByte; }; + ShaderBitSet mActiveUseBits; + + uint32_t id; + + // The id of a linked variable in each shader stage. This id originates from + // sh::ShaderVariable::id or sh::InterfaceBlock::id + ShaderMap mIds; }; +ANGLE_DISABLE_STRUCT_PADDING_WARNINGS struct BufferVariable : public sh::ShaderVariable { @@ -195,9 +208,9 @@ struct ShaderVariableBuffer { activeVariable.setActive(shaderType, used, _id); } - void unionReferencesWith(const ActiveVariable &other) + void unionReferencesWith(const LinkedUniform &otherUniform) { - activeVariable.unionReferencesWith(other); + activeVariable.unionReferencesWith(otherUniform); } bool isActive(ShaderType shaderType) const { return activeVariable.isActive(shaderType); } const ShaderMap &getIds() const { return activeVariable.getIds(); } diff --git a/src/libANGLE/queryutils.cpp b/src/libANGLE/queryutils.cpp index 525b61110..4c782a7b8 100644 --- a/src/libANGLE/queryutils.cpp +++ b/src/libANGLE/queryutils.cpp @@ -1925,16 +1925,16 @@ GLint GetUniformResourceProperty(const Program *program, GLuint index, const GLe return (uniform.isAtomicCounter() ? -1 : uniform.getBufferIndex()); case GL_OFFSET: - return uniform.getBlockInfo().offset; + return uniform.flagBits.isBlock ? uniform.blockOffset : -1; case GL_ARRAY_STRIDE: - return uniform.getBlockInfo().arrayStride; + return uniform.flagBits.isBlock ? uniform.blockArrayStride : -1; case GL_MATRIX_STRIDE: - return uniform.getBlockInfo().matrixStride; + return uniform.flagBits.isBlock ? uniform.blockMatrixStride : -1; case GL_IS_ROW_MAJOR: - return static_cast(uniform.getBlockInfo().isRowMajorMatrix); + return uniform.flagBits.blockIsRowMajorMatrix ? 1 : 0; case GL_REFERENCED_BY_VERTEX_SHADER: return uniform.isActive(ShaderType::Vertex); diff --git a/src/libANGLE/renderer/d3d/ProgramD3D.cpp b/src/libANGLE/renderer/d3d/ProgramD3D.cpp index aa9f4714d..c8e98bcf7 100644 --- a/src/libANGLE/renderer/d3d/ProgramD3D.cpp +++ b/src/libANGLE/renderer/d3d/ProgramD3D.cpp @@ -2903,7 +2903,7 @@ void ProgramD3D::assignSamplerRegisters(size_t uniformIndex) std::vector subscripts; const std::string baseName = gl::ParseResourceName(d3dUniform->name, &subscripts); unsigned int registerOffset = - mState.getUniforms()[uniformIndex].parentArrayIndex() * d3dUniform->getArraySizeProduct(); + mState.getUniforms()[uniformIndex].parentArrayIndex * d3dUniform->getArraySizeProduct(); bool hasUniform = false; for (gl::ShaderType shaderType : gl::AllShaderTypes()) @@ -3005,7 +3005,7 @@ void ProgramD3D::assignImageRegisters(size_t uniformIndex) std::vector subscripts; const std::string baseName = gl::ParseResourceName(d3dUniform->name, &subscripts); unsigned int registerOffset = - mState.getUniforms()[uniformIndex].parentArrayIndex() * d3dUniform->getArraySizeProduct(); + mState.getUniforms()[uniformIndex].parentArrayIndex * d3dUniform->getArraySizeProduct(); const gl::Shader *computeShader = mState.getAttachedShader(gl::ShaderType::Compute); if (computeShader) diff --git a/src/libANGLE/renderer/renderer_utils.h b/src/libANGLE/renderer/renderer_utils.h index 65e3c3c51..c795c3c8c 100644 --- a/src/libANGLE/renderer/renderer_utils.h +++ b/src/libANGLE/renderer/renderer_utils.h @@ -494,13 +494,4 @@ enum class PipelineType #define ANGLE_MARK_TRANSFORM_FEEDBACK_USAGE(instanced) \ ANGLE_MARK_TRANSFORM_FEEDBACK_USAGE##instanced -// Helper macro that casts to a bitfield type then verifies no bits were dropped. -#define SetBitField(lhs, rhs) \ - do \ - { \ - auto ANGLE_LOCAL_VAR = rhs; \ - lhs = static_cast::type>(ANGLE_LOCAL_VAR); \ - ASSERT(static_cast(lhs) == ANGLE_LOCAL_VAR); \ - } while (0) - #endif // LIBANGLE_RENDERER_RENDERER_UTILS_H_