build: arm target support

2026-01-04 18:09:53 +03:00 · 2021-04-25 23:36:09 +03:00
parent e794dbcbb1
commit 50a93ce91a
51 changed files with 8210 additions and 4386 deletions
--- a/public/bitmap/imageformat.h
+++ b/public/bitmap/imageformat.h
@@ -28,7 +28,9 @@ typedef enum _D3DFORMAT D3DFORMAT;
 //-----------------------------------------------------------------------------

 // don't bitch that inline functions aren't used!!!!
+#ifdef _WIN32
 #pragma warning(disable : 4514)
+#endif

 enum ImageFormat 
 {
--- a/public/materialsystem/imesh.h
+++ b/public/materialsystem/imesh.h
@@ -1156,6 +1156,9 @@ inline void CVertexBuilder::FastAdvanceNVertices( int n )
 //-----------------------------------------------------------------------------
 inline void CVertexBuilder::FastVertex( const ModelVertexDX7_t &vertex )
 {
+#ifdef __arm__
+	FastVertexSSE( vertex );
+#else
 	Assert( m_CompressionType == VERTEX_COMPRESSION_NONE ); // FIXME: support compressed verts if needed
 	Assert( m_nCurrentVertex < m_nMaxVertexCount );

@@ -1213,6 +1216,7 @@ inline void CVertexBuilder::FastVertex( const ModelVertexDX7_t &vertex )
 	m_bWrittenNormal   = false;
 	m_bWrittenUserData = false;
 #endif
+#endif
 }

 inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX7_t &vertex )
@@ -1322,6 +1326,9 @@ inline void CVertexBuilder::Fast4VerticesSSE(

 inline void CVertexBuilder::FastVertex( const ModelVertexDX8_t &vertex )
 {
+#ifdef __arm__
+	FastVertexSSE( vertex );
+#else
 	Assert( m_CompressionType == VERTEX_COMPRESSION_NONE ); // FIXME: support compressed verts if needed
 	Assert( m_nCurrentVertex < m_nMaxVertexCount );

@@ -1386,8 +1393,10 @@ inline void CVertexBuilder::FastVertex( const ModelVertexDX8_t &vertex )
 	m_bWrittenNormal   = false;
 	m_bWrittenUserData = false;
 #endif
+#endif
 }

+
 inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX8_t &vertex )
 {
 	Assert( m_CompressionType == VERTEX_COMPRESSION_NONE ); // FIXME: support compressed verts if needed
@@ -1414,7 +1423,8 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX8_t &vertex )
 #elif defined(GNUC)
 	const void *pRead = &vertex;
 	void *pCurrPos = m_pCurrPosition;
-	__asm__ __volatile__ (
+
+/*	__asm__ __volatile__ (
 						  "movaps (%0), %%xmm0\n"
 						  "movaps 16(%0), %%xmm1\n"
 						  "movaps 32(%0), %%xmm2\n"
@@ -1422,8 +1432,17 @@ inline void CVertexBuilder::FastVertexSSE( const ModelVertexDX8_t &vertex )
 						  "movntps %%xmm0, (%1)\n"
 						  "movntps %%xmm1, 16(%1)\n"
 						  "movntps %%xmm2, 32(%1)\n"
-						  "movntps %%xmm3, 48(%1)\n"						  
-						  :: "r" (pRead), "r" (pCurrPos) : "memory");
+						  "movntps %%xmm3, 48(%1)\n"
+						  :: "r" (pRead), "r" (pCurrPos) : "memory"); */
+
+	__m128 m1 = _mm_load_ps( (float *)pRead );
+	__m128 m2 = _mm_load_ps( (float *)(pRead + 16) );
+	__m128 m3 = _mm_load_ps( (float *)(pRead + 32) );
+	__m128 m4 = _mm_load_ps( (float *)(pRead + 48) );
+	_mm_stream_ps( (float *)pCurrPos, m1 );
+	_mm_stream_ps( (float *)(pCurrPos + 16), m2 );
+	_mm_stream_ps( (float *)(pCurrPos + 32), m3 );
+	_mm_stream_ps( (float *)(pCurrPos + 48), m4 );
 #else
 	Error( "Implement CMeshBuilder::FastVertexSSE((dx8)" );
 #endif
--- a/public/mathlib/mathlib.h
+++ b/public/mathlib/mathlib.h
@@ -458,11 +458,7 @@ void inline SinCos( float radians, float *sine, float *cosine )
 	*sine = sin( radians );
 	*cosine = cos( radians );
 #elif defined( POSIX )
-	double __cosr, __sinr;
-	__asm ("fsincos" : "=t" (__cosr), "=u" (__sinr) : "0" (radians));
-
-  	*sine = __sinr;
-  	*cosine = __cosr;
+	sincosf(radians, sine, cosine);
 #endif
 }

@@ -1217,6 +1213,8 @@ FORCEINLINE int RoundFloatToInt(float f)
 	};
 	flResult = __fctiw( f );
 	return pResult[1];
+#elif defined (__arm__)
+        return (int)(f + 0.5f);
 #else
 #error Unknown architecture
 #endif
@@ -1247,8 +1245,9 @@ FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f)
 	Assert( pIntResult[1] >= 0 );
 	return pResult[1];
 #else  // !X360
-	
-#if defined( PLATFORM_WINDOWS_PC64 )
+#ifdef __arm__
+        return (unsigned long)(f + 0.5f);
+#elif defined( PLATFORM_WINDOWS_PC64 )
 	uint nRet = ( uint ) f;
 	if ( nRet & 1 )
 	{
--- a/public/mathlib/ssemath.h
+++ b/public/mathlib/ssemath.h
@@ -8,6 +8,8 @@

 #if defined( _X360 )
 #include <xboxmath.h>
+#elif defined(__arm__)
+#include "sse2neon.h"
 #else
 #include <xmmintrin.h>
 #endif
@@ -21,7 +23,7 @@
 #define USE_STDC_FOR_SIMD 0
 #endif

-#if (!defined(_X360) && (USE_STDC_FOR_SIMD == 0))
+#if (!defined (__arm__) && !defined(_X360) && (USE_STDC_FOR_SIMD == 0))
 #define _SSE1 1
 #endif

--- a/public/mathlib/vector.h
+++ b/public/mathlib/vector.h
@@ -22,7 +22,8 @@
 // For rand(). We really need a library!
 #include <stdlib.h>

-#ifndef _X360
+#if defined(__SSE__) || defined(_M_IX86_FP)
+#define USE_SSE
 // For MMX intrinsics
 #include <xmmintrin.h>
 #endif
@@ -209,10 +210,9 @@ private:

 FORCEINLINE void NetworkVarConstruct( Vector &v ) { v.Zero(); }

-
-#define USE_M64S ( ( !defined( _X360 ) ) )
-
-
+#ifdef USE_SSE
+#define USE_M64S
+#endif

 //=========================================================
 // 4D Short Vector (aligned on 8-byte boundary)
--- a/public/mathlib/vector4d.h
+++ b/public/mathlib/vector4d.h
@@ -16,7 +16,7 @@
 #include <math.h>
 #include <stdlib.h>		// For rand(). We really need a library!
 #include <float.h>
-#if !defined( _X360 )
+#if defined(__SSE__) || defined(_M_IX86_FP)
 #include <xmmintrin.h>	// For SSE
 #endif
 #include "basetypes.h"	// For vec_t, put this somewhere else?
@@ -141,8 +141,10 @@ public:
 	inline void Set( vec_t X, vec_t Y, vec_t Z, vec_t W );
 	inline void InitZero( void );

+#ifndef __arm__
 	inline __m128 &AsM128() { return *(__m128*)&x; }
 	inline const __m128 &AsM128() const { return *(const __m128*)&x; } 
+#endif

 private:
 	// No copy constructors allowed if we're in optimal mode
@@ -613,8 +615,10 @@ inline void Vector4DAligned::Set( vec_t X, vec_t Y, vec_t Z, vec_t W )
 }

 inline void Vector4DAligned::InitZero( void )
-{ 
-#if !defined( _X360 )
+{
+#if defined (__arm__)
+	x = y = z = w = 0;
+#elif !defined( _X360 )
 	this->AsM128() = _mm_set1_ps( 0.0f );
 #else
 	this->AsM128() = __vspltisw( 0 );
@@ -625,7 +629,7 @@ inline void Vector4DAligned::InitZero( void )
 inline void Vector4DMultiplyAligned( Vector4DAligned const& a, Vector4DAligned const& b, Vector4DAligned& c )
 {
 	Assert( a.IsValid() && b.IsValid() );
-#if !defined( _X360 )
+#if !defined( _X360 ) || defined (__arm__)
 	c.x = a.x * b.x;
 	c.y = a.y * b.y;
 	c.z = a.z * b.z;
@@ -639,7 +643,7 @@ inline void Vector4DWeightMAD( vec_t w, Vector4DAligned const& vInA, Vector4DAli
 {
 	Assert( vInA.IsValid() && vInB.IsValid() && IsFinite(w) );

-#if !defined( _X360 )
+#if !defined( _X360 ) || defined (__arm__)
 	vOutA.x += vInA.x * w;
 	vOutA.y += vInA.y * w;
 	vOutA.z += vInA.z * w;
@@ -660,6 +664,7 @@ inline void Vector4DWeightMAD( vec_t w, Vector4DAligned const& vInA, Vector4DAli
 #endif
 }

+#ifndef __arm__
 inline void Vector4DWeightMADSSE( vec_t w, Vector4DAligned const& vInA, Vector4DAligned& vOutA, Vector4DAligned const& vInB, Vector4DAligned& vOutB )
 {
 	Assert( vInA.IsValid() && vInB.IsValid() && IsFinite(w) );
@@ -681,6 +686,7 @@ inline void Vector4DWeightMADSSE( vec_t w, Vector4DAligned const& vInA, Vector4D
 	vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() );
 #endif
 }
+#endif

 #endif // VECTOR4D_H

--- a/public/saverestoretypes.h
+++ b/public/saverestoretypes.h
@@ -512,27 +512,25 @@ inline const char *CSaveRestoreSegment::StringFromSymbol( int token )
 ///             compilers. Either way, there's no portable intrinsic.

 // Newer GCC versions provide this in this header, older did by default.
-#if !defined( _rotr ) && defined( COMPILER_GCC )
+#if !defined( _rotr ) && defined( COMPILER_GCC ) && !defined( __arm__ )
 #include <x86intrin.h>
 #endif

-#ifdef COMPILER_CLANG
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
-_rotr(unsigned int _Value, int _Shift) {
-	_Shift &= 0x1f;
-	return _Shift ? (_Value >> _Shift) | (_Value << (32 - _Shift)) : _Value;
+#if !defined ( _rotr )
+inline unsigned _rotr(unsigned x, unsigned n) {
+	 return (x >> n % 32) | (x << (32-n) % 32);
 }
 #endif

-
 inline unsigned int CSaveRestoreSegment::HashString( const char *pszToken )
 {
 	COMPILE_TIME_ASSERT( sizeof( unsigned int ) == 4 );
 	unsigned int	hash = 0;

 	while ( *pszToken )
+	{
 		hash = _rotr( hash, 4 ) ^ *pszToken++;
-
+	}
 	return hash;
 }

--- a/public/tier0/platform.h
+++ b/public/tier0/platform.h
@@ -713,7 +713,7 @@ typedef void * HINSTANCE;


 // When we port to 64 bit, we'll have to resolve the int, ptr vs size_t 32/64 bit problems...
-#if !defined( _WIN64 )
+#if !defined( _WIN64 ) && defined( _WIN32 )
 #pragma warning( disable : 4267 )	// conversion from 'size_t' to 'int', possible loss of data
 #pragma warning( disable : 4311 )	// pointer truncation from 'char *' to 'int'
 #pragma warning( disable : 4312 )	// conversion from 'unsigned int' to 'memhandle_t' of greater size
@@ -825,9 +825,9 @@ static FORCEINLINE double fsel(double fComparand, double fValGE, double fLT)

 		#endif
 	#endif
-
+#elif defined (__arm__)
+	inline void SetupFPUControlWord() {}
 #else
-
 	inline void SetupFPUControlWord()
 	{
 		__volatile unsigned short int __cw;
@@ -849,7 +849,7 @@ static FORCEINLINE double fsel(double fComparand, double fValGE, double fLT)
 			{
 				double flResult;
 				int pResult[2];
-			};
+			}
 			flResult = __fctiw( f );
 			return ( pResult[1] == 1 );
 		}
@@ -1160,7 +1160,11 @@ PLATFORM_INTERFACE struct tm *		Plat_localtime( const time_t *timep, struct tm *

 inline uint64 Plat_Rdtsc()
 {
-#if defined( _X360 )
+#if defined( __arm__ ) && defined (POSIX)
+	struct timespec t;
+	clock_gettime( CLOCK_REALTIME, &t);
+	return t.tv_sec * 1000000000ULL + t.tv_nsec;
+#elif defined( _X360 )
 	return ( uint64 )__mftb32();
 #elif defined( _WIN64 )
 	return ( uint64 )__rdtsc();
--- a/public/tier0/threadtools.h
+++ b/public/tier0/threadtools.h
@@ -25,6 +25,7 @@
 #ifdef POSIX
 #include <pthread.h>
 #include <errno.h>
+#include <sched.h>
 #define WAIT_OBJECT_0 0
 #define WAIT_TIMEOUT 0x00000102
 #define WAIT_FAILED -1
@@ -141,9 +142,11 @@ inline void ThreadPause()
 #if defined( PLATFORM_WINDOWS_PC )
 	// Intrinsic for __asm pause; from <intrin.h>
 	_mm_pause();
-#elif POSIX
+#elif POSIX && defined( __i386__ )
 	__asm __volatile( "pause" );
 #elif defined( _X360 )
+#elif defined(__arm__)
+	sched_yield();
 #else
 #error "implement me"
 #endif
--- a/public/tier1/convar.h
+++ b/public/tier1/convar.h
@@ -627,7 +627,9 @@ void ConVar_PrintDescription( const ConCommandBase *pVar );
 //-----------------------------------------------------------------------------
 // Purpose: Utility class to quickly allow ConCommands to call member methods
 //-----------------------------------------------------------------------------
+#ifdef _WIN32
 #pragma warning (disable : 4355 )
+#endif

 template< class T >
 class CConCommandMemberAccessor : public ConCommand, public ICommandCallback, public ICommandCompletionCallback
@@ -674,8 +676,9 @@ private:
 	FnMemberCommandCompletionCallback_t m_CompletionFunc;
 };

+#ifdef _WIN32
 #pragma warning ( default : 4355 )
-
+#endif

 //-----------------------------------------------------------------------------
 // Purpose: Utility macros to quicky generate a simple console command
--- a/public/tier1/utlblockmemory.h
+++ b/public/tier1/utlblockmemory.h
@@ -21,8 +21,10 @@
 #include "tier0/memalloc.h"
 #include "tier0/memdbgon.h"

+#ifdef _WIN32
 #pragma warning (disable:4100)
 #pragma warning (disable:4514)
+#endif

 //-----------------------------------------------------------------------------

--- a/public/tier1/utlfixedmemory.h
+++ b/public/tier1/utlfixedmemory.h
@@ -20,8 +20,10 @@
 #include "tier0/memalloc.h"
 #include "tier0/memdbgon.h"

+#ifdef _WIN32
 #pragma warning (disable:4100)
 #pragma warning (disable:4514)
+#endif

 //-----------------------------------------------------------------------------

--- a/public/tier1/utllinkedlist.h
+++ b/public/tier1/utllinkedlist.h
@@ -550,8 +550,10 @@ inline I  CUtlLinkedList<T,S,ML,I,M>::PrivateNext( I i ) const
 // Are nodes in the list or valid?
 //-----------------------------------------------------------------------------

+#ifdef _WIN32
 #pragma warning(push)
 #pragma warning( disable: 4310 ) // Allows "(I)(S)M::INVALID_INDEX" below
+#endif
 template <class T, class S, bool ML, class I, class M>
 inline bool CUtlLinkedList<T,S,ML,I,M>::IndexInRange( I index ) // Static method
 {
@@ -570,7 +572,9 @@ inline bool CUtlLinkedList<T,S,ML,I,M>::IndexInRange( I index ) // Static method

 	return ( ( (S)index == index ) && ( (S)index != InvalidIndex() ) );
 }
+#ifdef _WIN32
 #pragma warning(pop)
+#endif

 template <class T, class S, bool ML, class I, class M>
 inline bool CUtlLinkedList<T,S,ML,I,M>::IsValidIndex( I i ) const  
--- a/public/tier1/utlmemory.h
+++ b/public/tier1/utlmemory.h
@@ -22,9 +22,10 @@
 #include "tier0/memalloc.h"
 #include "tier0/memdbgon.h"

+#ifdef _WIN32
 #pragma warning (disable:4100)
 #pragma warning (disable:4514)
-
+#endif

 //-----------------------------------------------------------------------------

--- a/public/tier1/utlrbtree.h
+++ b/public/tier1/utlrbtree.h
@@ -664,8 +664,11 @@ inline void CUtlRBTree<T, I, L, M>::SetColor( I i, typename CUtlRBTree<T, I, L,
 //-----------------------------------------------------------------------------
 // Allocates/ deallocates nodes
 //-----------------------------------------------------------------------------
+#ifdef _WIN32
 #pragma warning(push)
 #pragma warning(disable:4389) // '==' : signed/unsigned mismatch
+#endif
+
 template < class T, class I, typename L, class M >
 I  CUtlRBTree<T, I, L, M>::NewNode()
 {
@@ -710,7 +713,9 @@ I  CUtlRBTree<T, I, L, M>::NewNode()

 	return elem;
 }
+#ifdef _WIN32
 #pragma warning(pop)
+#endif

 template < class T, class I, typename L, class M >
 void  CUtlRBTree<T, I, L, M>::FreeNode( I i )
--- a/public/tier1/utlvector.h
+++ b/public/tier1/utlvector.h
@@ -324,9 +324,12 @@ public:
 // Especialy useful if you have a lot of vectors that are sparse, or if you're
 // carefully packing holders of vectors
 //-----------------------------------------------------------------------------
+
+#ifdef _WIN32
 #pragma warning(push)
 #pragma warning(disable : 4200) // warning C4200: nonstandard extension used : zero-sized array in struct/union
 #pragma warning(disable : 4815 ) // warning C4815: 'staticData' : zero-sized array in stack object will have no elements
+#endif

 class CUtlVectorUltraConservativeAllocator
 {
@@ -573,7 +576,9 @@ private:
 	}
 };

+#ifdef _WIN32
 #pragma warning(pop)
+#endif

 // Make sure nobody adds multiple inheritance and makes this class bigger.
 COMPILE_TIME_ASSERT( sizeof(CUtlVectorUltraConservative<int>) == sizeof(void*) );
--- a/public/togl/linuxwin/glmgr.h
+++ b/public/togl/linuxwin/glmgr.h
@@ -214,8 +214,8 @@ struct GLClipPlaneEnable_t		{ GLint		enable;													inline bool operator==(
 struct GLClipPlaneEquation_t	{ GLfloat	x,y,z,w;												inline bool operator==(const GLClipPlaneEquation_t& src)	const { return EQ(x) && EQ(y) && EQ(z) && EQ(w);			} };

 //blend
-struct GLColorMaskSingle_t		{ char		r,g,b,a;												inline bool operator==(const GLColorMaskSingle_t& src)		const { return EQ(r) && EQ(g) && EQ(b) && EQ(a);			} };
-struct GLColorMaskMultiple_t	{ char		r,g,b,a;												inline bool operator==(const GLColorMaskMultiple_t& src)	const { return EQ(r) && EQ(g) && EQ(b) && EQ(a);			} };
+struct GLColorMaskSingle_t		{ signed char		r,g,b,a;												inline bool operator==(const GLColorMaskSingle_t& src)		const { return EQ(r) && EQ(g) && EQ(b) && EQ(a);			} };
+struct GLColorMaskMultiple_t	{ signed char		r,g,b,a;												inline bool operator==(const GLColorMaskMultiple_t& src)	const { return EQ(r) && EQ(g) && EQ(b) && EQ(a);			} };
 struct GLBlendEnable_t			{ GLint		enable;													inline bool operator==(const GLBlendEnable_t& src)			const { return EQ(enable);									} };
 struct GLBlendFactor_t			{ GLenum	srcfactor,dstfactor;									inline bool operator==(const GLBlendFactor_t& src)			const { return EQ(srcfactor) && EQ(dstfactor);				} };
 struct GLBlendEquation_t		{ GLenum	equation;												inline bool operator==(const GLBlendEquation_t& src)		const { return EQ(equation);								} };
@@ -225,7 +225,7 @@ struct GLBlendEnableSRGB_t		{ GLint		enable;													inline bool operator==(
 //depth
 struct GLDepthTestEnable_t		{ GLint		enable;													inline bool operator==(const GLDepthTestEnable_t& src)		const { return EQ(enable);									} };
 struct GLDepthFunc_t			{ GLenum	func;													inline bool operator==(const GLDepthFunc_t& src)			const { return EQ(func);									} };
-struct GLDepthMask_t			{ char		mask;													inline bool operator==(const GLDepthMask_t& src)			const { return EQ(mask);									} };
+struct GLDepthMask_t			{  char		mask;													inline bool operator==(const GLDepthMask_t& src)			const { return EQ(mask);									} };

 //stencil
 struct GLStencilTestEnable_t	{ GLint		enable;													inline bool operator==(const GLStencilTestEnable_t& src)	const { return EQ(enable);									} };
--- a/public/vgui/VGUI.h
+++ b/public/vgui/VGUI.h
@@ -22,6 +22,7 @@
 #endif
 #endif

+#ifdef _WIN32
 #pragma warning( disable: 4800 )	// disables 'performance warning converting int to bool'
 #pragma warning( disable: 4786 )	// disables 'identifier truncated in browser information' warning
 #pragma warning( disable: 4355 )	// disables 'this' : used in base member initializer list
@@ -29,6 +30,7 @@
 #pragma warning( disable: 4514 )	// warning C4514: 'Color::Color' : unreferenced inline function has been removed
 #pragma warning( disable: 4100 )	// warning C4100: 'code' : unreferenced formal parameter
 #pragma warning( disable: 4127 )	// warning C4127: conditional expression is constant
+#endif

 typedef unsigned char  uchar;
 typedef unsigned short ushort;
--- a/public/vstdlib/pch_vstdlib.h
+++ b/public/vstdlib/pch_vstdlib.h
@@ -11,8 +11,9 @@
 // $NoKeywords: $
 //=============================================================================

-
+#ifdef _WIN32
 #pragma warning(disable: 4514)
+#endif

 // First include standard libraries
 #include <stdio.h>
--- a/public/vstdlib/random.h
+++ b/public/vstdlib/random.h
@@ -16,8 +16,10 @@

 #define NTAB 32

+#ifdef _WIN32
 #pragma warning(push)
 #pragma warning( disable:4251 )
+#endif

 //-----------------------------------------------------------------------------
 // A generator of uniformly distributed random numbers
@@ -114,8 +116,9 @@ public:
 //-----------------------------------------------------------------------------
 VSTDLIB_INTERFACE void	InstallUniformRandomStream( IUniformRandomStream *pStream );

-
+#ifdef _WIN32
 #pragma warning(pop)
+#endif

 #endif // VSTDLIB_RANDOM_H