Revert "Tightly pack LinkedUniform by using int16_t"

This reverts commit 152cf62b38.

Reason for revert: Suspect cause of failure in for several Linux MSan Tests, e.g.
https://ci.chromium.org/ui/p/chromium/builders/ci/Linux%20MSan%20Tests/42403/overview
https://ci.chromium.org/ui/p/chromium/builders/ci/WebKit%20Linux%20MSAN/22174/overview
https://ci.chromium.org/ui/p/chromium/builders/ci/Linux%20MSan%20Tests/42403/overview

Original change's description:
> Tightly pack LinkedUniform by using int16_t
>
> There is a check of vector size when we link uniforms and the maximum
> vector size is 4096 due to we clamp the maxUniformBlockSize to 64KB. In
> reality, if we exceeds this number, program link will take really long
> time and then hit failure. So there is no real need to keep all the
> variables in 32 bit integer. This CL changes to 16 bit integer. Further,
> sh::BlockMemberInfo and ActiveVariable data members are embeded into
> LinkedUniform struct as well so that the unused variables can be removed
> and data can be tightly packed. This also makes LinkedUniform easier to
> maintain as a simple struct with basic data types. With this change,
> LinkedUniform size is reduced from 108 bytes down to 60 bytes, 48 bytes
> reduction. Given some apps has 200-ish uniforms, this CL reduces 48
> bytes x 200 = ~9K memory just for uniforms per program (which goes
> through hash compute and decompression and file reads).
>
> Bug: b/275102061
> Change-Id: I7fae20f5b75f3239305e2094a992e3040b8c8e4c
> Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/4754133
> Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>
> Commit-Queue: Charlie Lao <cclao@google.com>

Bug: b/275102061
Change-Id: Id344e306307553731097f06edafc40bfeb73ff80
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/4780494
Reviewed-by: Geoff Lang <geofflang@chromium.org>
Commit-Queue: Geoff Lang <geofflang@chromium.org>
This commit is contained in:
Vivian Pao
2023-08-16 06:38:15 +00:00
committed by Angle LUCI CQ
parent 5d2a1749ce
commit 9f48f9314e
8 changed files with 118 additions and 130 deletions

View File

@@ -60,7 +60,7 @@ template <typename Key,
class KeyEqual = std::equal_to<Key>>
using HashMap = std::unordered_map<Key, T, Hash, KeyEqual>;
template <typename Key, class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>>
using HashSet = std::unordered_set<Key, Hash, KeyEqual>;
using HashSet = std::unordered_set<Key, Hash, KeyEqual>;
# if __cpp_lib_generic_unordered_lookup >= 201811L
# define ANGLE_HAS_HASH_MAP_GENERIC_LOOKUP 1
# else
@@ -401,15 +401,6 @@ class ConditionalMutex final : angle::NonCopyable
bool mUseMutex;
};
// Helper macro that casts to a bitfield type then verifies no bits were dropped.
#define SetBitField(lhs, rhs) \
do \
{ \
auto ANGLE_LOCAL_VAR = rhs; \
lhs = static_cast<typename std::decay<decltype(lhs)>::type>(ANGLE_LOCAL_VAR); \
ASSERT(static_cast<decltype(ANGLE_LOCAL_VAR)>(lhs) == ANGLE_LOCAL_VAR); \
} while (0)
// snprintf is not defined with MSVC prior to to msvc14
#if defined(_MSC_VER) && _MSC_VER < 1900
# define snprintf _snprintf

View File

@@ -471,7 +471,7 @@ void LoadShaderVariableBuffer(BinaryInputStream *stream, ShaderVariableBuffer *v
{
var->memberIndexes.resize(numMembers);
stream->readBytes(reinterpret_cast<unsigned char *>(var->memberIndexes.data()),
sizeof(*var->memberIndexes.data()) * var->memberIndexes.size());
sizeof(unsigned int) * var->memberIndexes.size());
}
}

View File

@@ -1658,9 +1658,9 @@ void ProgramExecutable::linkSamplerAndImageBindings(GLuint *combinedImageUniform
{
// The arrays of arrays are flattened to arrays, it needs to record the array offset for
// the correct binding image unit.
mImageBindings.emplace_back(
ImageBinding(imageUniform.getBinding() + imageUniform.parentArrayIndex * arraySize,
imageUniform.getBasicTypeElementCount(), textureType));
mImageBindings.emplace_back(ImageBinding(
imageUniform.getBinding() + imageUniform.parentArrayIndex() * arraySize,
imageUniform.getBasicTypeElementCount(), textureType));
}
*combinedImageUniforms += imageUniform.activeShaderCount() * arraySize;
@@ -1697,14 +1697,13 @@ bool ProgramExecutable::linkAtomicCounterBuffers(const Context *context, InfoLog
{
auto &uniform = mUniforms[index];
uniform.blockOffset = uniform.getOffset();
uniform.blockArrayStride = uniform.isArray() ? 4 : 0;
uniform.blockMatrixStride = 0;
uniform.flagBits.blockIsRowMajorMatrix = false;
uniform.flagBits.isBlock = true;
uniform.blockInfo.offset = uniform.getOffset();
uniform.blockInfo.arrayStride = uniform.isArray() ? 4 : 0;
uniform.blockInfo.matrixStride = 0;
uniform.blockInfo.isRowMajorMatrix = false;
bool found = false;
for (uint16_t bufferIndex = 0; bufferIndex < getActiveAtomicCounterBufferCount();
for (unsigned int bufferIndex = 0; bufferIndex < getActiveAtomicCounterBufferCount();
++bufferIndex)
{
auto &buffer = mAtomicCounterBuffers[bufferIndex];
@@ -1713,7 +1712,7 @@ bool ProgramExecutable::linkAtomicCounterBuffers(const Context *context, InfoLog
buffer.memberIndexes.push_back(index);
uniform.bufferIndex = bufferIndex;
found = true;
buffer.unionReferencesWith(uniform);
buffer.unionReferencesWith(uniform.activeVariable);
break;
}
}
@@ -1722,9 +1721,9 @@ bool ProgramExecutable::linkAtomicCounterBuffers(const Context *context, InfoLog
AtomicCounterBuffer atomicCounterBuffer;
atomicCounterBuffer.binding = uniform.getBinding();
atomicCounterBuffer.memberIndexes.push_back(index);
atomicCounterBuffer.unionReferencesWith(uniform);
atomicCounterBuffer.unionReferencesWith(uniform.activeVariable);
mAtomicCounterBuffers.push_back(atomicCounterBuffer);
uniform.bufferIndex = static_cast<uint16_t>(getActiveAtomicCounterBufferCount() - 1);
uniform.bufferIndex = static_cast<int>(getActiveAtomicCounterBufferCount() - 1);
}
}

View File

@@ -30,21 +30,26 @@ void ActiveVariable::setActive(ShaderType shaderType, bool used, uint32_t id)
mIds[shaderType] = id;
}
void ActiveVariable::unionReferencesWith(const LinkedUniform &other)
void ActiveVariable::unionReferencesWith(const ActiveVariable &other)
{
mActiveUseBits |= other.mActiveUseBits;
for (const ShaderType shaderType : AllShaderTypes())
{
ASSERT(mIds[shaderType] == 0 || other.getId(shaderType) == 0 ||
mIds[shaderType] == other.getId(shaderType));
ASSERT(mIds[shaderType] == 0 || other.mIds[shaderType] == 0 ||
mIds[shaderType] == other.mIds[shaderType]);
if (mIds[shaderType] == 0)
{
mIds[shaderType] = other.getId(shaderType);
mIds[shaderType] = other.mIds[shaderType];
}
}
}
LinkedUniform::LinkedUniform() {}
LinkedUniform::LinkedUniform()
{
memset(this, 0, sizeof(*this));
blockInfo = sh::BlockMemberInfo();
activeVariable = ActiveVariable();
}
LinkedUniform::LinkedUniform(GLenum typeIn,
GLenum precisionIn,
@@ -55,27 +60,26 @@ LinkedUniform::LinkedUniform(GLenum typeIn,
const int bufferIndexIn,
const sh::BlockMemberInfo &blockInfoIn)
{
// arrays are always flattened, which means at most 1D array
ASSERT(arraySizesIn.size() <= 1);
memset(this, 0, sizeof(*this));
SetBitField(type, typeIn);
SetBitField(precision, precisionIn);
location = locationIn;
SetBitField(binding, bindingIn);
SetBitField(offset, offsetIn);
SetBitField(bufferIndex, bufferIndexIn);
outerArraySizeProduct = 1;
SetBitField(arraySize, arraySizesIn.empty() ? 1u : arraySizesIn[0]);
SetBitField(flagBits.isArray, !arraySizesIn.empty());
if (!(blockInfoIn == sh::kDefaultBlockMemberInfo))
{
flagBits.isBlock = 1;
flagBits.blockIsRowMajorMatrix = blockInfoIn.isRowMajorMatrix;
SetBitField(blockOffset, blockInfoIn.offset);
SetBitField(blockArrayStride, blockInfoIn.arrayStride);
SetBitField(blockMatrixStride, blockInfoIn.matrixStride);
}
// Note: Ensure every data member is initialized.
type = typeIn;
precision = precisionIn;
imageUnitFormat = GL_NONE;
location = locationIn;
binding = bindingIn;
offset = offsetIn;
bufferIndex = bufferIndexIn;
blockInfo = blockInfoIn;
activeVariable = ActiveVariable();
id = 0;
flattenedOffsetInParentArrays = -1;
outerArraySizeProduct = 1;
outerArrayOffset = 0;
arraySize = arraySizesIn.empty() ? 1 : arraySizesIn[0];
flagBitsAsUInt = 0;
flagBits.isArray = !arraySizesIn.empty();
ASSERT(arraySizesIn.size() <= 1);
}
LinkedUniform::LinkedUniform(const LinkedUniform &other)
@@ -85,34 +89,32 @@ LinkedUniform::LinkedUniform(const LinkedUniform &other)
LinkedUniform::LinkedUniform(const UsedUniform &usedUniform)
{
memset(this, 0, sizeof(*this));
ASSERT(!usedUniform.isArrayOfArrays());
ASSERT(!usedUniform.isStruct());
ASSERT(usedUniform.active);
ASSERT(usedUniform.blockInfo == sh::kDefaultBlockMemberInfo);
// Note: Ensure every data member is initialized.
flagBitsAsUByte = 0;
SetBitField(type, usedUniform.type);
SetBitField(precision, usedUniform.precision);
SetBitField(imageUnitFormat, usedUniform.imageUnitFormat);
location = usedUniform.location;
SetBitField(binding, usedUniform.binding);
SetBitField(offset, usedUniform.offset);
type = usedUniform.type;
precision = usedUniform.precision;
imageUnitFormat = usedUniform.imageUnitFormat;
location = usedUniform.location;
binding = usedUniform.binding;
offset = usedUniform.offset;
bufferIndex = usedUniform.bufferIndex;
blockInfo = usedUniform.blockInfo;
activeVariable = usedUniform.activeVariable;
id = usedUniform.id;
flattenedOffsetInParentArrays = usedUniform.getFlattenedOffsetInParentArrays();
outerArraySizeProduct = ArraySizeProduct(usedUniform.outerArraySizes);
outerArrayOffset = usedUniform.outerArrayOffset;
arraySize = usedUniform.isArray() ? usedUniform.getArraySizeProduct() : 1u;
SetBitField(bufferIndex, usedUniform.bufferIndex);
SetBitField(parentArrayIndex, usedUniform.parentArrayIndex());
SetBitField(outerArraySizeProduct, ArraySizeProduct(usedUniform.outerArraySizes));
SetBitField(outerArrayOffset, usedUniform.outerArrayOffset);
SetBitField(arraySize, usedUniform.isArray() ? usedUniform.getArraySizeProduct() : 1u);
SetBitField(flagBits.isArray, usedUniform.isArray());
id = usedUniform.id;
mActiveUseBits = usedUniform.activeVariable.activeShaders();
mIds = usedUniform.activeVariable.getIds();
SetBitField(flagBits.isFragmentInOut, usedUniform.isFragmentInOut);
SetBitField(flagBits.texelFetchStaticUse, usedUniform.texelFetchStaticUse);
ASSERT(!usedUniform.isArray() || arraySize == usedUniform.getArraySizeProduct());
flagBitsAsUInt = 0;
flagBits.isFragmentInOut = usedUniform.isFragmentInOut;
flagBits.texelFetchStaticUse = usedUniform.texelFetchStaticUse;
flagBits.isArray = usedUniform.isArray();
}
LinkedUniform::~LinkedUniform() {}

View File

@@ -23,7 +23,6 @@ class BinaryInputStream;
class BinaryOutputStream;
struct UniformTypeInfo;
struct UsedUniform;
struct LinkedUniform;
// Note: keep this struct memcpy-able: i.e, a simple struct with basic types only and no virtual
// functions. LinkedUniform relies on this so that it can use memcpy to initialize uniform for
@@ -41,7 +40,7 @@ struct ActiveVariable
return static_cast<ShaderType>(ScanForward(mActiveUseBits.bits()));
}
void setActive(ShaderType shaderType, bool used, uint32_t id);
void unionReferencesWith(const LinkedUniform &otherUniform);
void unionReferencesWith(const ActiveVariable &other);
bool isActive(ShaderType shaderType) const
{
ASSERT(shaderType != ShaderType::InvalidEnum);
@@ -50,6 +49,7 @@ struct ActiveVariable
const ShaderMap<uint32_t> &getIds() const { return mIds; }
uint32_t getId(ShaderType shaderType) const { return mIds[shaderType]; }
ShaderBitSet activeShaders() const { return mActiveUseBits; }
GLuint activeShaderCount() const { return static_cast<GLuint>(mActiveUseBits.count()); }
private:
ShaderBitSet mActiveUseBits;
@@ -62,7 +62,6 @@ struct ActiveVariable
// not put any std::vector or objects with virtual functions in it.
// Helper struct representing a single shader uniform. Most of this structure's data member and
// access functions mirrors ShaderVariable; See ShaderVars.h for more info.
ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
struct LinkedUniform
{
LinkedUniform();
@@ -89,82 +88,70 @@ struct LinkedUniform
bool isFragmentInOut() const { return flagBits.isFragmentInOut; }
bool isArray() const { return flagBits.isArray; }
uint16_t getBasicTypeElementCount() const
unsigned int getBasicTypeElementCount() const
{
ASSERT(flagBits.isArray || arraySize == 1u);
return arraySize;
}
GLenum getType() const { return type; }
uint16_t getOuterArrayOffset() const { return outerArrayOffset; }
uint16_t getOuterArraySizeProduct() const { return outerArraySizeProduct; }
int16_t getBinding() const { return binding; }
int16_t getOffset() const { return offset; }
unsigned int getOuterArrayOffset() const { return outerArrayOffset; }
unsigned int getOuterArraySizeProduct() const { return outerArraySizeProduct; }
int getBinding() const { return binding; }
int getOffset() const { return offset; }
const sh::BlockMemberInfo &getBlockInfo() const { return blockInfo; }
int getBufferIndex() const { return bufferIndex; }
int getLocation() const { return location; }
GLenum getImageUnitFormat() const { return imageUnitFormat; }
int parentArrayIndex() const
{
return flattenedOffsetInParentArrays != -1 ? flattenedOffsetInParentArrays : 0;
}
ShaderType getFirstActiveShaderType() const
{
return static_cast<ShaderType>(ScanForward(mActiveUseBits.bits()));
return activeVariable.getFirstActiveShaderType();
}
void setActive(ShaderType shaderType, bool used, uint32_t _id)
{
mActiveUseBits.set(shaderType, used);
mIds[shaderType] = id;
activeVariable.setActive(shaderType, used, _id);
}
bool isActive(ShaderType shaderType) const { return mActiveUseBits[shaderType]; }
const ShaderMap<uint32_t> &getIds() const { return mIds; }
uint32_t getId(ShaderType shaderType) const { return mIds[shaderType]; }
ShaderBitSet activeShaders() const { return mActiveUseBits; }
GLuint activeShaderCount() const { return static_cast<GLuint>(mActiveUseBits.count()); }
bool isActive(ShaderType shaderType) const { return activeVariable.isActive(shaderType); }
const ShaderMap<uint32_t> &getIds() const { return activeVariable.getIds(); }
uint32_t getId(ShaderType shaderType) const { return activeVariable.getId(shaderType); }
ShaderBitSet activeShaders() const { return activeVariable.activeShaders(); }
GLuint activeShaderCount() const { return activeVariable.activeShaderCount(); }
uint16_t type;
uint16_t precision;
sh::BlockMemberInfo blockInfo;
ActiveVariable activeVariable;
GLenum type;
GLenum precision;
GLenum imageUnitFormat;
int location;
int binding;
int offset;
uint32_t id;
int flattenedOffsetInParentArrays;
int bufferIndex;
unsigned int outerArraySizeProduct;
unsigned int outerArrayOffset;
unsigned int arraySize;
// These are from sh::struct BlockMemberInfo struct. See locklayout.h for detail.
uint16_t blockOffset;
uint16_t blockArrayStride;
uint16_t blockMatrixStride;
uint16_t imageUnitFormat;
// maxUniformVectorsCount is 4K due to we clamp maxUniformBlockSize to 64KB. All of these
// variable should be enough to pack into 16 bits to reduce the size of mUniforms.
int16_t binding;
int16_t bufferIndex;
int16_t offset;
uint16_t arraySize;
uint16_t outerArraySizeProduct;
uint16_t outerArrayOffset;
uint16_t parentArrayIndex;
union
{
struct
{
uint8_t isFragmentInOut : 1;
uint8_t texelFetchStaticUse : 1;
uint8_t isArray : 1;
uint8_t blockIsRowMajorMatrix : 1;
uint8_t isBlock : 1;
uint8_t padding : 3;
uint32_t isFragmentInOut : 1;
uint32_t texelFetchStaticUse : 1;
uint32_t isArray : 1;
uint32_t padding : 29;
} flagBits;
uint8_t flagBitsAsUByte;
uint32_t flagBitsAsUInt;
};
ShaderBitSet mActiveUseBits;
uint32_t id;
// The id of a linked variable in each shader stage. This id originates from
// sh::ShaderVariable::id or sh::InterfaceBlock::id
ShaderMap<uint32_t> mIds;
};
ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
struct BufferVariable : public sh::ShaderVariable
{
@@ -208,9 +195,9 @@ struct ShaderVariableBuffer
{
activeVariable.setActive(shaderType, used, _id);
}
void unionReferencesWith(const LinkedUniform &otherUniform)
void unionReferencesWith(const ActiveVariable &other)
{
activeVariable.unionReferencesWith(otherUniform);
activeVariable.unionReferencesWith(other);
}
bool isActive(ShaderType shaderType) const { return activeVariable.isActive(shaderType); }
const ShaderMap<uint32_t> &getIds() const { return activeVariable.getIds(); }

View File

@@ -1925,16 +1925,16 @@ GLint GetUniformResourceProperty(const Program *program, GLuint index, const GLe
return (uniform.isAtomicCounter() ? -1 : uniform.getBufferIndex());
case GL_OFFSET:
return uniform.flagBits.isBlock ? uniform.blockOffset : -1;
return uniform.getBlockInfo().offset;
case GL_ARRAY_STRIDE:
return uniform.flagBits.isBlock ? uniform.blockArrayStride : -1;
return uniform.getBlockInfo().arrayStride;
case GL_MATRIX_STRIDE:
return uniform.flagBits.isBlock ? uniform.blockMatrixStride : -1;
return uniform.getBlockInfo().matrixStride;
case GL_IS_ROW_MAJOR:
return uniform.flagBits.blockIsRowMajorMatrix ? 1 : 0;
return static_cast<GLint>(uniform.getBlockInfo().isRowMajorMatrix);
case GL_REFERENCED_BY_VERTEX_SHADER:
return uniform.isActive(ShaderType::Vertex);

View File

@@ -2903,7 +2903,7 @@ void ProgramD3D::assignSamplerRegisters(size_t uniformIndex)
std::vector<unsigned int> subscripts;
const std::string baseName = gl::ParseResourceName(d3dUniform->name, &subscripts);
unsigned int registerOffset =
mState.getUniforms()[uniformIndex].parentArrayIndex * d3dUniform->getArraySizeProduct();
mState.getUniforms()[uniformIndex].parentArrayIndex() * d3dUniform->getArraySizeProduct();
bool hasUniform = false;
for (gl::ShaderType shaderType : gl::AllShaderTypes())
@@ -3005,7 +3005,7 @@ void ProgramD3D::assignImageRegisters(size_t uniformIndex)
std::vector<unsigned int> subscripts;
const std::string baseName = gl::ParseResourceName(d3dUniform->name, &subscripts);
unsigned int registerOffset =
mState.getUniforms()[uniformIndex].parentArrayIndex * d3dUniform->getArraySizeProduct();
mState.getUniforms()[uniformIndex].parentArrayIndex() * d3dUniform->getArraySizeProduct();
const gl::Shader *computeShader = mState.getAttachedShader(gl::ShaderType::Compute);
if (computeShader)

View File

@@ -494,4 +494,13 @@ enum class PipelineType
#define ANGLE_MARK_TRANSFORM_FEEDBACK_USAGE(instanced) \
ANGLE_MARK_TRANSFORM_FEEDBACK_USAGE##instanced
// Helper macro that casts to a bitfield type then verifies no bits were dropped.
#define SetBitField(lhs, rhs) \
do \
{ \
auto ANGLE_LOCAL_VAR = rhs; \
lhs = static_cast<typename std::decay<decltype(lhs)>::type>(ANGLE_LOCAL_VAR); \
ASSERT(static_cast<decltype(ANGLE_LOCAL_VAR)>(lhs) == ANGLE_LOCAL_VAR); \
} while (0)
#endif // LIBANGLE_RENDERER_RENDERER_UTILS_H_