Metal: Compile default shader source files separately.

- Previously all default shader files were included in one file named
  master_source.metal.
- Now they will be compiled separately and link together to generate
  binary shader.
- Also generate a debug binary variant that will be used in debug build.

Bug: angleproject:2634
Change-Id: Ic040835c4a729666e73afeba52f5be839b632396
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2281784
Commit-Queue: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Jonah Ryan-Davis <jonahr@google.com>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
This commit is contained in:
Le Hoang Quyen
2020-07-06 01:18:22 +08:00
committed by Commit Bot
parent 9277ee7413
commit 3e0b61a986
10 changed files with 27869 additions and 10536 deletions

View File

@@ -1,20 +1,16 @@
{
"src/libANGLE/renderer/metal/shaders/blit.metal":
"1a12b22f56799bd38cf1c6b301b720ee",
"8a00929424273e07e4b63d4ab062f005",
"src/libANGLE/renderer/metal/shaders/clear.metal":
"1c231afc6100433a79fce49046aa5965",
"src/libANGLE/renderer/metal/shaders/common.h":
"7330bd3f7ab21214e4fe16bc526749bb",
"src/libANGLE/renderer/metal/shaders/compiled/mtl_default_shaders.inc":
"445538e99fb3679c3c8436f565911c69",
"7f4fe9c0ee3fa9bee84ea6627df44e04",
"src/libANGLE/renderer/metal/shaders/compiled/mtl_default_shaders_autogen.inc":
"e244973f2fada4861373fbbf36be5d33",
"src/libANGLE/renderer/metal/shaders/constants.h":
"9bb6e63bf2b48a7a56978c787bde4850",
"src/libANGLE/renderer/metal/shaders/gen_indices.metal":
"002511e2b980a7fca7e80cbda6a82712",
"src/libANGLE/renderer/metal/shaders/gen_mtl_internal_shaders.py":
"0e599fb113dbc3f714291383d85c39c2",
"src/libANGLE/renderer/metal/shaders/master_source.metal":
"fbe6f4bfb49a48ae87791a4cff5fab0a",
"src/libANGLE/renderer/metal/shaders/mtl_default_shaders_src_autogen.inc":
"ee7ff414da20e7b84f1187d2bea1e84d"
"a17189e67fe112942926902691de1539"
}

View File

@@ -54,9 +54,8 @@ _metal_backend_sources = [
"mtl_state_cache.mm",
"mtl_utils.h",
"mtl_utils.mm",
"shaders/compiled/mtl_default_shaders.inc",
"shaders/compiled/mtl_default_shaders_autogen.inc",
"shaders/constants.h",
"shaders/mtl_default_shaders_src_autogen.inc",
]
config("angle_metal_backend_config") {

View File

@@ -15,8 +15,7 @@
#include "libANGLE/renderer/metal/ContextMtl.h"
#include "libANGLE/renderer/metal/SurfaceMtl.h"
#include "libANGLE/renderer/metal/mtl_common.h"
#include "libANGLE/renderer/metal/shaders/compiled/mtl_default_shaders.inc"
#include "libANGLE/renderer/metal/shaders/mtl_default_shaders_src_autogen.inc"
#include "libANGLE/renderer/metal/shaders/compiled/mtl_default_shaders_autogen.inc"
#include "platform/Platform.h"
#include "EGL/eglext.h"
@@ -666,14 +665,20 @@ angle::Result DisplayMtl::initializeShaderLibrary()
{
mtl::AutoObjCObj<NSError> err = nil;
#if defined(ANGLE_MTL_DEBUG_INTERNAL_SHADERS)
mDefaultShaders = CreateShaderLibrary(getMetalDevice(), default_metallib_src,
sizeof(default_metallib_src), &err);
const uint8_t *compiled_shader_binary;
size_t compiled_shader_binary_len;
#if !defined(NDEBUG)
compiled_shader_binary = compiled_default_metallib_debug;
compiled_shader_binary_len = compiled_default_metallib_debug_len;
#else
mDefaultShaders = CreateShaderLibraryFromBinary(getMetalDevice(), compiled_default_metallib,
compiled_default_metallib_len, &err);
compiled_shader_binary = compiled_default_metallib;
compiled_shader_binary_len = compiled_default_metallib_len;
#endif
mDefaultShaders = CreateShaderLibraryFromBinary(getMetalDevice(), compiled_shader_binary,
compiled_shader_binary_len, &err);
if (err && !mDefaultShaders)
{
ANGLE_MTL_OBJC_SCOPE

View File

@@ -9,10 +9,9 @@
using namespace rx::mtl_shader;
// function_constant(0-3) is already used by gen_indices.metal
constant bool kPremultiplyAlpha [[function_constant(4)]];
constant bool kUnmultiplyAlpha [[function_constant(5)]];
constant int kSourceTextureType [[function_constant(6)]]; // Source texture type.
constant bool kPremultiplyAlpha [[function_constant(1)]];
constant bool kUnmultiplyAlpha [[function_constant(2)]];
constant int kSourceTextureType [[function_constant(3)]]; // Source texture type.
constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D;
constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray;
constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample;

View File

@@ -26,8 +26,4 @@ using namespace metal;
// Full screen triangle's vertices
constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)};
fragment float4 dummyFS()
{
return float4(0, 0, 0, 0);
}
#endif /* LIBANGLE_RENDERER_METAL_SHADERS_COMMON_H_ */

File diff suppressed because it is too large Load Diff

View File

@@ -22,14 +22,125 @@ template_header_boilerplate = """// GENERATED FILE - DO NOT EDIT.
"""
# Convert content of a file to byte array and store in a header file.
# variable_name: name of C++ variable that will hold the file content as byte array.
# filename: the file whose content will be converted to C++ byte array.
# dest_src_file: destination header file that will contain the byte array.
def append_file_as_byte_array_string(variable_name, filename, dest_src_file):
string = '// Generated from {0}:\n'.format(filename)
string += 'constexpr\nunsigned char {0}[] = {{\n'.format(variable_name)
bytes_ = open(filename, "rb").read()
byteCounter = 0
for byte in bytes_:
if byteCounter == 0:
string += " "
string += '0x{:02x}'.format(ord(byte)) + ","
byteCounter += 1
if byteCounter == 12:
byteCounter = 0
string += "\n"
else:
string += " "
string += "\n};\n"
with open(dest_src_file, "a") as out_file:
out_file.write(string)
# Compile metal shader.
# mac_version: target version of macOS
# ios_version: target version of iOS
# variable_name: name of C++ variable that will hold the compiled binary data as a C array.
# additional_flags: additional shader compiler flags
# src_files: metal source files
def gen_precompiled_shaders(mac_version, ios_version, variable_name, additional_flags, src_files):
print('Generating default shaders with flags=\'{0}\' ...'.format(additional_flags))
# Mac version's compilation
print('Compiling macos {0} version of default shaders ...'.format(mac_version))
object_files = ''
for src_file in src_files:
object_file = 'compiled/default.{0}.{1}.air'.format(mac_version, src_file)
object_files += ' ' + object_file
os.system('xcrun -sdk macosx metal -mmacosx-version-min={0} {1} {2} -c -o {3}'.format(
mac_version, additional_flags, src_file, object_file))
os.system(
'xcrun -sdk macosx metallib {object_files} -o compiled/default.{mac_version}.metallib'
.format(mac_version=mac_version, object_files=object_files))
# iOS device version's compilation
print('Compiling ios {0} version of default shaders ...'.format(ios_version))
object_files = ''
for src_file in src_files:
object_file = 'compiled/default.ios.{0}.{1}.air'.format(ios_version, src_file)
object_files += ' ' + object_file
os.system('xcrun -sdk iphoneos metal -mios-version-min={0} {1} {2} -c -o {3}'.format(
ios_version, additional_flags, src_file, object_file))
os.system(
'xcrun -sdk iphoneos metallib {object_files} -o compiled/default.ios.{ios_version}.metallib'
.format(ios_version=ios_version, object_files=object_files))
# iOS simulator version's compilation
print('Compiling ios {0} simulator version of default shaders ...'.format(ios_version))
object_files = ''
object_files = ''
for src_file in src_files:
object_file = 'compiled/default.ios_sim.{0}.{1}.air'.format(ios_version, src_file)
object_files += ' ' + object_file
os.system('xcrun -sdk iphonesimulator metal {0} {1} -c -o {2}'.format(
additional_flags, src_file, object_file))
os.system(
'xcrun -sdk iphonesimulator metallib {object_files} -o compiled/default.ios_sim.{ios_version}.metallib'
.format(ios_version=ios_version, object_files=object_files))
# Mac version's byte array string
os.system(
'echo "#if TARGET_OS_OSX || TARGET_OS_MACCATALYST\n" >> compiled/mtl_default_shaders_autogen.inc'
)
append_file_as_byte_array_string(variable_name,
'compiled/default.{0}.metallib'.format(mac_version),
'compiled/mtl_default_shaders_autogen.inc')
os.system(
'echo "constexpr\nsize_t {0}_len = sizeof({0});" >> compiled/mtl_default_shaders_autogen.inc'
.format(variable_name))
# iOS simulator version's byte array string
os.system(
'echo "\n#elif TARGET_OS_IOS && TARGET_OS_SIMULATOR // TARGET_OS_OSX || TARGET_OS_MACCATALYST\n" >> compiled/mtl_default_shaders_autogen.inc'
)
append_file_as_byte_array_string(variable_name,
'compiled/default.ios_sim.{0}.metallib'.format(ios_version),
'compiled/mtl_default_shaders_autogen.inc')
os.system(
'echo "constexpr\nsize_t {0}_len = sizeof({0});" >> compiled/mtl_default_shaders_autogen.inc'
.format(variable_name))
# iOS version's byte array string
os.system(
'echo "\n#elif TARGET_OS_IOS // TARGET_OS_OSX || TARGET_OS_MACCATALYST\n" >> compiled/mtl_default_shaders_autogen.inc'
)
append_file_as_byte_array_string(variable_name,
'compiled/default.ios.{0}.metallib'.format(ios_version),
'compiled/mtl_default_shaders_autogen.inc')
os.system(
'echo "constexpr\nsize_t {0}_len = sizeof({0});" >> compiled/mtl_default_shaders_autogen.inc'
.format(variable_name))
os.system(
'echo "#endif // TARGET_OS_OSX || TARGET_OS_MACCATALYST\n" >> compiled/mtl_default_shaders_autogen.inc'
)
os.system('rm -rfv compiled/default.*')
def main():
src_files = ['blit.metal', 'clear.metal', 'gen_indices.metal']
# auto_script parameters.
if len(sys.argv) > 1:
inputs = [
'master_source.metal', 'blit.metal', 'clear.metal', 'gen_indices.metal', 'common.h',
'constants.h'
]
outputs = ['compiled/mtl_default_shaders.inc', 'mtl_default_shaders_src_autogen.inc']
inputs = src_files + ['common.h', 'constants.h']
outputs = ['compiled/mtl_default_shaders_autogen.inc']
if sys.argv[1] == 'inputs':
print ','.join(inputs)
@@ -42,88 +153,26 @@ def main():
os.chdir(sys.path[0])
print('Compiling macos version of default shaders ...')
os.system(
'xcrun -sdk macosx metal master_source.metal -mmacosx-version-min=10.13 -c -o compiled/default.air'
)
os.system('xcrun -sdk macosx metallib compiled/default.air -o compiled/default.metallib')
print('Compiling ios version of default shaders ...')
os.system(
'xcrun -sdk iphoneos metal master_source.metal -mios-version-min=11.0 -c -o compiled/default.ios.air'
)
os.system(
'xcrun -sdk iphoneos metallib compiled/default.ios.air -o compiled/default.ios.metallib')
print('Compiling ios simulator version of default shaders ...')
os.system(
'xcrun -sdk iphonesimulator metal master_source.metal -c -o compiled/default.ios_sim.air')
os.system(
'xcrun -sdk iphonesimulator metallib compiled/default.ios_sim.air -o compiled/default.ios_sim.metallib'
)
boilerplate_code = template_header_boilerplate.format(
script_name=sys.argv[0], copyright_year=datetime.today().year)
os.system("echo \"{0}\" > compiled/mtl_default_shaders.inc".format(boilerplate_code))
# -------- Compile shaders -----------
# boilerplate code
os.system("echo \"{0}\" > compiled/mtl_default_shaders_autogen.inc".format(boilerplate_code))
os.system(
'echo "// Compiled binary for Metal default shaders.\n\n" >> compiled/mtl_default_shaders.inc'
'echo "// Compiled binary for Metal default shaders.\n\n" >> compiled/mtl_default_shaders_autogen.inc'
)
os.system('echo "#include <TargetConditionals.h>\n\n" >> compiled/mtl_default_shaders.inc')
os.system(
'echo "#include <TargetConditionals.h>\n\n" >> compiled/mtl_default_shaders_autogen.inc')
# Mac version
os.system(
'echo "#if TARGET_OS_OSX || TARGET_OS_MACCATALYST\n" >> compiled/mtl_default_shaders.inc')
os.system('echo "// clang-format off" >> compiled/mtl_default_shaders_autogen.inc')
os.system('echo "constexpr" >> compiled/mtl_default_shaders.inc')
os.system('xxd -i compiled/default.metallib >> compiled/mtl_default_shaders.inc')
# pre-compiled shaders
gen_precompiled_shaders(10.13, 11.0, 'compiled_default_metallib', '', src_files)
gen_precompiled_shaders(10.13, 11.0, 'compiled_default_metallib_debug',
'-gline-tables-only -MO', src_files)
# iOS simulator version
os.system(
'echo "\n#elif TARGET_OS_SIMULATOR // TARGET_OS_OSX || TARGET_OS_MACCATALYST\n" >> compiled/mtl_default_shaders.inc'
)
os.system(
'echo "#define compiled_default_metallib compiled_default_ios_sim_metallib" >> compiled/mtl_default_shaders.inc'
)
os.system(
'echo "#define compiled_default_metallib_len compiled_default_ios_sim_metallib_len\n" >> compiled/mtl_default_shaders.inc'
)
os.system('echo "constexpr" >> compiled/mtl_default_shaders.inc')
os.system('xxd -i compiled/default.ios_sim.metallib >> compiled/mtl_default_shaders.inc')
# iOS version
os.system(
'echo "\n#elif TARGET_OS_IOS // TARGET_OS_OSX || TARGET_OS_MACCATALYST\n" >> compiled/mtl_default_shaders.inc'
)
os.system(
'echo "#define compiled_default_metallib compiled_default_ios_metallib" >> compiled/mtl_default_shaders.inc'
)
os.system(
'echo "#define compiled_default_metallib_len compiled_default_ios_metallib_len\n" >> compiled/mtl_default_shaders.inc'
)
os.system('echo "constexpr" >> compiled/mtl_default_shaders.inc')
os.system('xxd -i compiled/default.ios.metallib >> compiled/mtl_default_shaders.inc')
os.system(
'echo "#endif // TARGET_OS_OSX || TARGET_OS_MACCATALYST\n" >> compiled/mtl_default_shaders.inc'
)
# Write full source string for debug purpose
os.system("echo \"{0}\" > mtl_default_shaders_src_autogen.inc".format(boilerplate_code))
os.system(
'echo "// C++ string version of Metal default shaders for debug purpose.\n\n" >> mtl_default_shaders_src_autogen.inc'
)
os.system(
'echo "\n\nconstexpr char default_metallib_src[] = R\\"(" >> mtl_default_shaders_src_autogen.inc'
)
os.system('echo "#include <metal_stdlib>" >> mtl_default_shaders_src_autogen.inc')
os.system('echo "#include <simd/simd.h>" >> mtl_default_shaders_src_autogen.inc')
os.system(
'clang -xc++ -E -DSKIP_STD_HEADERS master_source.metal >> mtl_default_shaders_src_autogen.inc'
)
os.system('echo ")\\";" >> mtl_default_shaders_src_autogen.inc')
os.system('echo "// clang-format on" >> compiled/mtl_default_shaders_autogen.inc')
if __name__ == '__main__':

View File

@@ -1,11 +0,0 @@
//
// Copyright 2019 The ANGLE Project. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// master_source.metal:
// Includes all other metal source code in one file.
#include "clear.metal"
#include "blit.metal"
#include "gen_indices.metal"

View File

@@ -1,427 +0,0 @@
// GENERATED FILE - DO NOT EDIT.
// Generated by gen_mtl_internal_shaders.py
//
// Copyright 2020 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// C++ string version of Metal default shaders for debug purpose.
constexpr char default_metallib_src[] = R"(
#include <metal_stdlib>
#include <simd/simd.h>
# 1 "master_source.metal"
# 1 "<built-in>" 1
# 1 "<built-in>" 3
# 376 "<built-in>" 3
# 1 "<command line>" 1
# 1 "<built-in>" 2
# 1 "master_source.metal" 2
# 1 "./clear.metal" 1
# 1 "./common.h" 1
# 16 "./common.h"
# 1 "./constants.h" 1
# 11 "./constants.h"
namespace rx
{
namespace mtl_shader
{
enum
{
kTextureType2D = 0,
kTextureType2DMultisample = 1,
kTextureType2DArray = 2,
kTextureTypeCube = 3,
kTextureType3D = 4,
kTextureTypeCount = 5,
};
}
}
# 17 "./common.h" 2
using namespace metal;
constant float2 gCorners[3] = {float2(-1.0f, -1.0f), float2(3.0f, -1.0f), float2(-1.0f, 3.0f)};
fragment float4 dummyFS()
{
return float4(0, 0, 0, 0);
}
# 9 "./clear.metal" 2
struct ClearParams
{
float4 clearColor;
float clearDepth;
};
vertex float4 clearVS(unsigned int vid [[ vertex_id ]],
constant ClearParams &clearParams [[buffer(0)]])
{
return float4(gCorners[vid], clearParams.clearDepth, 1.0);
}
fragment float4 clearFS(constant ClearParams &clearParams [[buffer(0)]])
{
return clearParams.clearColor;
}
# 10 "master_source.metal" 2
# 1 "./blit.metal" 1
# 10 "./blit.metal"
using namespace rx::mtl_shader;
constant bool kPremultiplyAlpha [[function_constant(4)]];
constant bool kUnmultiplyAlpha [[function_constant(5)]];
constant int kSourceTextureType [[function_constant(6)]];
constant bool kSourceTextureType2D = kSourceTextureType == kTextureType2D;
constant bool kSourceTextureType2DArray = kSourceTextureType == kTextureType2DArray;
constant bool kSourceTextureType2DMS = kSourceTextureType == kTextureType2DMultisample;
constant bool kSourceTextureTypeCube = kSourceTextureType == kTextureTypeCube;
constant bool kSourceTextureType3D = kSourceTextureType == kTextureType3D;
struct BlitParams
{
float2 srcTexCoords[3];
int srcLevel;
int srcLayer;
bool dstFlipViewportX;
bool dstFlipViewportY;
bool dstLuminance;
};
struct BlitVSOut
{
float4 position [[position]];
float2 texCoords [[user(locn1)]];
};
vertex BlitVSOut blitVS(unsigned int vid [[vertex_id]], constant BlitParams &options [[buffer(0)]])
{
BlitVSOut output;
output.position = float4(gCorners[vid], 0.0, 1.0);
output.texCoords = options.srcTexCoords[vid];
if (options.dstFlipViewportX)
{
output.position.x = -output.position.x;
}
if (!options.dstFlipViewportY)
{
output.position.y = -output.position.y;
}
return output;
}
static inline float3 cubeTexcoords(float2 texcoords, int face)
{
texcoords = 2.0 * texcoords - 1.0;
switch (face)
{
case 0:
return float3(1.0, -texcoords.y, -texcoords.x);
case 1:
return float3(-1.0, -texcoords.y, texcoords.x);
case 2:
return float3(texcoords.x, 1.0, texcoords.y);
case 3:
return float3(texcoords.x, -1.0, -texcoords.y);
case 4:
return float3(texcoords.x, -texcoords.y, 1.0);
case 5:
return float3(-texcoords.x, -texcoords.y, -1.0);
}
return float3(texcoords, 0);
}
template <typename T>
static inline vec<T, 4> blitSampleTextureMS(texture2d_ms<T> srcTexture, float2 texCoords)
{
uint2 dimens(srcTexture.get_width(), srcTexture.get_height());
uint2 coords = uint2(texCoords * float2(dimens));
uint samples = srcTexture.get_num_samples();
vec<T, 4> output(0);
for (uint sample = 0; sample < samples; ++sample)
{
output += srcTexture.read(coords, sample);
}
output = output / samples;
return output;
}
template <typename T>
static inline vec<T, 4> blitSampleTexture3D(texture3d<T> srcTexture,
sampler textureSampler,
float2 texCoords,
constant BlitParams &options)
{
uint depth = srcTexture.get_depth(options.srcLevel);
float zCoord = (float(options.srcLayer) + 0.5) / float(depth);
return srcTexture.sample(textureSampler, float3(texCoords, zCoord), level(options.srcLevel));
}
# 130 "./blit.metal"
template <typename T>
static inline vec<T, 4> blitReadTexture(BlitVSOut input [[stage_in]], texture2d<T> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<T> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<T> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<T> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<T> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
{
vec<T, 4> output;
switch (kSourceTextureType)
{
case kTextureType2D:
output = srcTexture2d.sample(textureSampler, input.texCoords, level(options.srcLevel));
break;
case kTextureType2DArray:
output = srcTexture2dArray.sample(textureSampler, input.texCoords, options.srcLayer,
level(options.srcLevel));
break;
case kTextureType2DMultisample:
output = blitSampleTextureMS(srcTexture2dMS, input.texCoords);
break;
case kTextureTypeCube:
output = srcTextureCube.sample(textureSampler,
cubeTexcoords(input.texCoords, options.srcLayer),
level(options.srcLevel));
break;
case kTextureType3D:
output = blitSampleTexture3D(srcTexture3d, textureSampler, input.texCoords, options);
break;
}
if (kPremultiplyAlpha)
{
output.xyz *= output.a;
}
else if (kUnmultiplyAlpha)
{
if (output.a != 0.0)
{
output.xyz /= output.a;
}
}
if (options.dstLuminance)
{
output.g = output.b = output.r;
}
return output;
}
fragment float4 blitFS(BlitVSOut input [[stage_in]], texture2d<float> srcTexture2d [[texture(0), function_constant(kSourceTextureType2D)]], texture2d_array<float> srcTexture2dArray [[texture(0), function_constant(kSourceTextureType2DArray)]], texture2d_ms<float> srcTexture2dMS [[texture(0), function_constant(kSourceTextureType2DMS)]], texturecube<float> srcTextureCube [[texture(0), function_constant(kSourceTextureTypeCube)]], texture3d<float> srcTexture3d [[texture(0), function_constant(kSourceTextureType3D)]], sampler textureSampler [[sampler(0)]], constant BlitParams &options [[buffer(0)]])
{
return blitReadTexture(input, srcTexture2d, srcTexture2dArray, srcTexture2dMS, srcTextureCube, srcTexture3d, textureSampler, options);
}
# 11 "master_source.metal" 2
# 1 "./gen_indices.metal" 1
constant bool kSourceBufferAligned[[function_constant(0)]];
constant bool kSourceIndexIsU8[[function_constant(1)]];
constant bool kSourceIndexIsU16[[function_constant(2)]];
constant bool kSourceIndexIsU32[[function_constant(3)]];
constant bool kSourceBufferUnaligned = !kSourceBufferAligned;
constant bool kUseSourceBufferU8 = kSourceIndexIsU8 || kSourceBufferUnaligned;
constant bool kUseSourceBufferU16 = kSourceIndexIsU16 && kSourceBufferAligned;
constant bool kUseSourceBufferU32 = kSourceIndexIsU32 && kSourceBufferAligned;
struct IndexConversionParams
{
uint32_t srcOffset;
uint32_t indexCount;
};
inline ushort getIndexAligned(constant ushort *inputAligned, uint offset, uint idx)
{
return inputAligned[offset / 2 + idx];
}
inline uint getIndexAligned(constant uint *inputAligned, uint offset, uint idx)
{
return inputAligned[offset / 4 + idx];
}
inline uchar getIndexAligned(constant uchar *input, uint offset, uint idx)
{
return input[offset + idx];
}
inline ushort getIndexUnalignedU16(constant uchar *input, uint offset, uint idx)
{
ushort inputLo = input[offset + 2 * idx];
ushort inputHi = input[offset + 2 * idx + 1];
return inputLo | (inputHi << 8);
}
inline uint getIndexUnalignedU32(constant uchar *input, uint offset, uint idx)
{
uint input0 = input[offset + 4 * idx];
uint input1 = input[offset + 4 * idx + 1];
uint input2 = input[offset + 4 * idx + 2];
uint input3 = input[offset + 4 * idx + 3];
return input0 | (input1 << 8) | (input2 << 16) | (input3 << 24);
}
kernel void convertIndexU8ToU16(uint idx[[thread_position_in_grid]],
constant IndexConversionParams &options[[buffer(0)]],
constant uchar *input[[buffer(1)]],
device ushort *output[[buffer(2)]])
{
if (idx >= options.indexCount) { return; };
output[idx] = getIndexAligned(input, options.srcOffset, idx);
}
kernel void convertIndexU16(
uint idx[[thread_position_in_grid]],
constant IndexConversionParams &options[[buffer(0)]],
constant uchar *input[[ buffer(1), function_constant(kSourceBufferUnaligned) ]],
constant ushort *inputAligned[[ buffer(1), function_constant(kSourceBufferAligned) ]],
device ushort *output[[buffer(2)]])
{
if (idx >= options.indexCount) { return; };
ushort value;
if (kSourceBufferAligned)
{
value = getIndexAligned(inputAligned, options.srcOffset, idx);
}
else
{
value = getIndexUnalignedU16(input, options.srcOffset, idx);
}
output[idx] = value;
}
kernel void convertIndexU32(
uint idx[[thread_position_in_grid]],
constant IndexConversionParams &options[[buffer(0)]],
constant uchar *input[[ buffer(1), function_constant(kSourceBufferUnaligned) ]],
constant uint *inputAligned[[ buffer(1), function_constant(kSourceBufferAligned) ]],
device uint *output[[buffer(2)]])
{
if (idx >= options.indexCount) { return; };
uint value;
if (kSourceBufferAligned)
{
value = getIndexAligned(inputAligned, options.srcOffset, idx);
}
else
{
value = getIndexUnalignedU32(input, options.srcOffset, idx);
}
output[idx] = value;
}
struct TriFanArrayParams
{
uint firstVertex;
uint vertexCountFrom3rd;
};
kernel void genTriFanIndicesFromArray(uint idx[[thread_position_in_grid]],
constant TriFanArrayParams &options[[buffer(0)]],
device uint *output[[buffer(2)]])
{
if (idx >= options.vertexCountFrom3rd) { return; };
uint vertexIdx = options.firstVertex + 2 + idx;
output[3 * idx] = options.firstVertex;
output[3 * idx + 1] = vertexIdx - 1;
output[3 * idx + 2] = vertexIdx;
}
inline uint getIndexU32(uint offset,
uint idx,
constant uchar *inputU8[[function_constant(kUseSourceBufferU8)]],
constant ushort *inputU16[[function_constant(kUseSourceBufferU16)]],
constant uint *inputU32[[function_constant(kUseSourceBufferU32)]])
{
if (kUseSourceBufferU8)
{
if (kSourceIndexIsU16)
{
return getIndexUnalignedU16(inputU8, offset, idx);
}
else if (kSourceIndexIsU32)
{
return getIndexUnalignedU32(inputU8, offset, idx);
}
return getIndexAligned(inputU8, offset, idx);
}
else if (kUseSourceBufferU16)
{
return getIndexAligned(inputU16, offset, idx);
}
else if (kUseSourceBufferU32)
{
return getIndexAligned(inputU32, offset, idx);
}
return 0;
}
kernel void genTriFanIndicesFromElements(
uint idx[[thread_position_in_grid]],
constant IndexConversionParams &options[[buffer(0)]],
constant uchar *inputU8[[ buffer(1), function_constant(kUseSourceBufferU8) ]],
constant ushort *inputU16[[ buffer(1), function_constant(kUseSourceBufferU16) ]],
constant uint *inputU32[[ buffer(1), function_constant(kUseSourceBufferU32) ]],
device uint *output[[buffer(2)]])
{
if (idx >= options.indexCount) { return; };
uint elemIdx = 2 + idx;
output[3 * idx] = getIndexU32(options.srcOffset, 0, inputU8, inputU16, inputU32);
output[3 * idx + 1] = getIndexU32(options.srcOffset, elemIdx - 1, inputU8, inputU16, inputU32);
output[3 * idx + 2] = getIndexU32(options.srcOffset, elemIdx, inputU8, inputU16, inputU32);
}
# 12 "master_source.metal" 2
)";