From d4e1493ca0501e743e055f21854605e8f39d29f8 Mon Sep 17 00:00:00 2001 From: Shahbaz Youssefi Date: Mon, 11 Sep 2023 22:38:32 -0400 Subject: [PATCH] Optimize compressed texture loads Similar to LoadToNative, which has a fast-path for when the pitches align (and a single memcpy is used), LoadCompressedToNative is made to use a single memcpy when the pitches align. Bug: angleproject:8341 Change-Id: I4893f9ec26bb80d83593fc102990bd84c38bd12b Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/4856674 Reviewed-by: Charlie Lao Commit-Queue: Shahbaz Youssefi Reviewed-by: Geoff Lang --- src/image_util/loadimage.inc | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/image_util/loadimage.inc b/src/image_util/loadimage.inc index 7f243dbf4..f937baec6 100644 --- a/src/image_util/loadimage.inc +++ b/src/image_util/loadimage.inc @@ -173,13 +173,31 @@ inline void LoadCompressedToNative(const ImageLoadContext &context, size_t width const size_t rows = (height + (blockHeight - 1)) / blockHeight; const size_t layers = (depth + (blockDepth - 1)) / blockDepth; - for (size_t z = 0; z < layers; ++z) + const size_t inputLayerSize = inputRowPitch * rows; + const size_t inputImageSize = inputDepthPitch * layers; + + const size_t outputLayerSize = outputRowPitch * rows; + const size_t outputImageSize = outputDepthPitch * layers; + + if (inputImageSize == outputImageSize) { - for (size_t y = 0; y < rows; ++y) + ASSERT(inputRowPitch == outputRowPitch); + ASSERT(inputLayerSize == outputLayerSize && inputLayerSize == inputDepthPitch && outputLayerSize == outputDepthPitch); + memcpy(output, input, inputImageSize); + } + else + { + // Note: this path should technically never be hit, but it is with the d3d backend. Once + // the issue is fixed, this path should be removed. + // http://anglebug.com/8345 + for (size_t z = 0; z < layers; ++z) { - const uint8_t *source = priv::OffsetDataPointer(input, y, z, inputRowPitch, inputDepthPitch); - uint8_t *dest = priv::OffsetDataPointer(output, y, z, outputRowPitch, outputDepthPitch); - memcpy(dest, source, columns * blockSize); + for (size_t y = 0; y < rows; ++y) + { + const uint8_t *source = priv::OffsetDataPointer(input, y, z, inputRowPitch, inputDepthPitch); + uint8_t *dest = priv::OffsetDataPointer(output, y, z, outputRowPitch, outputDepthPitch); + memcpy(dest, source, columns * blockSize); + } } } }