Optimize compressed texture loads

Similar to LoadToNative, which has a fast-path for when the pitches
align (and a single memcpy is used), LoadCompressedToNative is made to
use a single memcpy when the pitches align.

Bug: angleproject:8341
Change-Id: I4893f9ec26bb80d83593fc102990bd84c38bd12b
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/4856674
Reviewed-by: Charlie Lao <cclao@google.com>
Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
This commit is contained in:
Shahbaz Youssefi
2023-09-11 22:38:32 -04:00
committed by Angle LUCI CQ
parent f57b0c3a53
commit d4e1493ca0

View File

@@ -173,13 +173,31 @@ inline void LoadCompressedToNative(const ImageLoadContext &context, size_t width
const size_t rows = (height + (blockHeight - 1)) / blockHeight;
const size_t layers = (depth + (blockDepth - 1)) / blockDepth;
for (size_t z = 0; z < layers; ++z)
const size_t inputLayerSize = inputRowPitch * rows;
const size_t inputImageSize = inputDepthPitch * layers;
const size_t outputLayerSize = outputRowPitch * rows;
const size_t outputImageSize = outputDepthPitch * layers;
if (inputImageSize == outputImageSize)
{
for (size_t y = 0; y < rows; ++y)
ASSERT(inputRowPitch == outputRowPitch);
ASSERT(inputLayerSize == outputLayerSize && inputLayerSize == inputDepthPitch && outputLayerSize == outputDepthPitch);
memcpy(output, input, inputImageSize);
}
else
{
// Note: this path should technically never be hit, but it is with the d3d backend. Once
// the issue is fixed, this path should be removed.
// http://anglebug.com/8345
for (size_t z = 0; z < layers; ++z)
{
const uint8_t *source = priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
uint8_t *dest = priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
memcpy(dest, source, columns * blockSize);
for (size_t y = 0; y < rows; ++y)
{
const uint8_t *source = priv::OffsetDataPointer<uint8_t>(input, y, z, inputRowPitch, inputDepthPitch);
uint8_t *dest = priv::OffsetDataPointer<uint8_t>(output, y, z, outputRowPitch, outputDepthPitch);
memcpy(dest, source, columns * blockSize);
}
}
}
}