// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM   : ALL
// PRODUCT   : COMMON
// VISIBILITY   : PUBLIC
//
// ------------------------------------------------------TKBMS v1.0

void HK_CALL hkString::memCpy4( _Out_writes_bytes_(numWords*4) void* dst, _In_reads_bytes_(numWords*4) const void* src, _In_range_(0, HK_INT32_MAX/4) int numWords)
{
    const hkUint32* src32 = reinterpret_cast<const hkUint32*>(src);
    hkUint32* dst32       = reinterpret_cast<      hkUint32*>(dst);
    {
        for (int i = 0; i < numWords; i++)
        {
            *(dst32++) = *(src32++);
        }
    }
}

void HK_CALL hkString::memCpy16( _Out_writes_bytes_(numQuads*16) void* dst, _In_reads_bytes_(numQuads*16) const void* src, _In_range_(0, HK_INT32_MAX/16) int numQuads)
{
#if defined( HK_ARCH_IA32 ) || defined( HK_ARCH_X64 )
    HK_ASSERT( 0xf021d445, (hkUlong(dst) & HK_NATIVE_ALIGN_CHECK ) == 0, "Unaligned address" );
    HK_ASSERT( 0xf021d446, (hkUlong(src) & HK_NATIVE_ALIGN_CHECK ) == 0, "Unaligned address" );

    const hkQuadFloat32* srcQuad = reinterpret_cast<const hkQuadFloat32*>(src);
    hkQuadFloat32* dstQuad = reinterpret_cast<hkQuadFloat32*>(dst);
    {
        if ( numQuads )
        {
            do
            {
                *(dstQuad++) = *(srcQuad++);
            }
            while(--numQuads);
        }
//      for (int i = numQuads-1; i>=0; i--) // this loop will be detected by the compiler and replaced by a slow ooo call to ::memcpy
//      {
//          *(dstQuad++) = *(srcQuad++);
//      }
    }
#else

#if defined(HK_ALIGN_RELAX_CHECKS) // stack allocated vars not aligned to 16 etc, yet a lot of code uses this call to init the vars. As it is only using uint32, it only has to be 4 byte aligned anyway.
    HK_ASSERT( 0xf022d445, (hkUlong(dst) & 0x03) == 0, "Unaligned address" );
    HK_ASSERT( 0xf022d446, (hkUlong(src) & 0x03) == 0, "Unaligned address" );
#else
    HK_ASSERT( 0xf021d445, (hkUlong(dst) & 0xf) == 0, "Unaligned address" );
    HK_ASSERT( 0xf021d446, (hkUlong(src) & 0xf) == 0, "Unaligned address" );
#endif

    const hkUint32* src32 = reinterpret_cast<const hkUint32*>(src);
    hkUint32* dst32 = reinterpret_cast<      hkUint32*>(dst);
    {
        for (int i = 0; i < numQuads; i++)
        {
            hkUint32 a = src32[0];
            hkUint32 b = src32[1];
            hkUint32 c = src32[2];
            hkUint32 d = src32[3];
            dst32[0] = a;
            dst32[1] = b;
            dst32[2] = c;
            dst32[3] = d;
            dst32+= 4;
            src32+= 4;
        }
    }
#endif
}

void HK_CALL hkString::memCpy16NonEmpty( _Out_writes_bytes_(numQuads*16) void* dst, _In_reads_bytes_(numQuads*16) const void* src, _In_range_(0, HK_INT32_MAX/16) int numQuads)
{
    HK_ASSERT( 0xf022d444, numQuads > 0, "Size 0 not allowed" );
#if (defined(HK_ARCH_IA32) || defined(HK_ARCH_X64)) && (HK_CONFIG_SIMD == HK_CONFIG_SIMD_ENABLED)

    HK_ASSERT( 0xf022d445, (hkUlong(dst) & HK_NATIVE_ALIGN_CHECK) == 0, "Unaligned address" );
    HK_ASSERT( 0xf022d446, (hkUlong(src) & HK_NATIVE_ALIGN_CHECK) == 0, "Unaligned address" );
    const hkQuadFloat32* srcQuad = reinterpret_cast<const hkQuadFloat32*>(src);
    hkQuadFloat32* dstQuad = reinterpret_cast<hkQuadFloat32*>(dst);
    {
        do
        {
            *(dstQuad++) = *(srcQuad++);
        }
        while ( --numQuads > 0 );
    }

#else

    #if defined(HK_ALIGN_RELAX_CHECKS)  // stack allocated vars not aligned to 16 etc, yet a lot of code uses this call to init the vars. As it is only using uint32, it only has to be 4 byte aligned anyway.
        HK_ASSERT( 0xf022d445, (hkUlong(dst) & 0x03) == 0, "Unaligned address" );
        HK_ASSERT( 0xf022d446, (hkUlong(src) & 0x03) == 0, "Unaligned address" );
    #else
        HK_ASSERT( 0xf022d445, (hkUlong(dst) & 0xf) == 0, "Unaligned address" );
        HK_ASSERT( 0xf022d446, (hkUlong(src) & 0xf) == 0, "Unaligned address" );
    #endif
    const hkUint32* src32 = reinterpret_cast<const hkUint32*>(src);
    hkUint32* dst32 = reinterpret_cast<      hkUint32*>(dst);
    {
        do
        {
            hkUint32 a = src32[0];
            hkUint32 b = src32[1];
            hkUint32 c = src32[2];
            hkUint32 d = src32[3];
            dst32[0] = a;
            dst32[1] = b;
            dst32[2] = c;
            dst32[3] = d;
            dst32+= 4;
            src32+= 4;
        }
        while ( --numQuads > 0 );
    }
#endif
}

template<int size>
void HK_CALL hkString::memCpy16(_Out_writes_bytes_(size) void* dst, _In_reads_bytes_(size) const void* src)
{
    HK_ASSERT_NO_MSG( 0xf0dedf34, ((size & 0xf) == 0) && (size <= 192) && (size > 0));
#if HK_CONFIG_SIMD == HK_CONFIG_SIMD_ENABLED

#   if defined( HK_PLATFORM_PS4 )
    const int maxSize = 192;
#   else
    const int maxSize = 64;
#   endif

    if ( size <= maxSize )
    {
        const hkQuadFloat32* srcQuad = reinterpret_cast<const hkQuadFloat32*>(src);
        hkQuadFloat32*       dstQuad = reinterpret_cast<hkQuadFloat32*>(dst);
        hkQuadFloat32 a,b,c,d;
        if ( size >  0) a = srcQuad[0];
        if ( size > 16) b = srcQuad[1];
        if ( size > 32) c = srcQuad[2];
        if ( size > 48) d = srcQuad[3];
        if ( size >  0) dstQuad[0] = a;
        if ( size > 64) a = srcQuad[4];
        if ( size > 16) dstQuad[1] = b;
        if ( size > 80) b = srcQuad[5];
        if ( size > 32) dstQuad[2] = c;
        if ( size > 96) c = srcQuad[6];
        if ( size > 48) dstQuad[3] = d;
        if ( size > 112) d = srcQuad[7];
        if ( size > 64) dstQuad[4] = a;
        if ( size > 128) a = srcQuad[8];
        if ( size > 80) dstQuad[5] = b;
        if ( size > 144) b = srcQuad[9];
        if ( size > 96) dstQuad[6] = c;
        if ( size > 160) c = srcQuad[10];
        if ( size > 112) dstQuad[7] = d;
        if ( size > 176) d = srcQuad[11];
        if ( size > 128) dstQuad[8] = a;
        if ( size > 144) dstQuad[9] = b;
        if ( size > 160) dstQuad[10] = c;
        if ( size > 176) dstQuad[11] = d;
    }
    else
#endif // config simd
    {
        hkString::memCpy16NonEmpty(dst, src, size/16);
    }
}

void HK_CALL hkString::memCpy256(_Out_writes_bytes_(256) void* dst, _In_reads_bytes_(256) const void* src)
{
#if defined( HK_PLATFORM_PS4 )
    const hkQuadReal* srcQuad = reinterpret_cast<const hkQuadReal*>(src);
    hkQuadReal*       dstQuad = reinterpret_cast<hkQuadReal*>(dst);

    hkQuadReal a = srcQuad[0];
    hkQuadReal b = srcQuad[1];
    hkQuadReal c = srcQuad[2];
    hkQuadReal d = srcQuad[3];
    dstQuad[0]  = a; a = srcQuad[4];
    dstQuad[1]  = b; b = srcQuad[5];
    dstQuad[2]  = c; c = srcQuad[6];
    dstQuad[3]  = d; d = srcQuad[7];
    dstQuad[4]  = a; a = srcQuad[8];
    dstQuad[5]  = b; b = srcQuad[9];
    dstQuad[6]  = c; c = srcQuad[10];
    dstQuad[7]  = d; d = srcQuad[11];
    dstQuad[8]  = a; a = srcQuad[12];
    dstQuad[9]  = b; b = srcQuad[13];
    dstQuad[10] = c; c = srcQuad[14];
    dstQuad[11] = d; d = srcQuad[15];
    dstQuad[12] = a;
    dstQuad[13] = b;
    dstQuad[14] = c;
    dstQuad[15] = d;
#else
    hkString::memCpy16NonEmpty(dst, src, 16);
#endif
}

void HK_CALL hkString::memSet4(_Out_writes_bytes_(numWords*4) void* dst, const int value, int numWords)
{
#if defined(HK_PLATFORM_WIIU)
    const int clearSize = OSRoundDown32B(dst + numWords * 4) - OSRoundUp32B(dst);
    if(clearSize > 0)
    {
        DCZeroRange(reinterpret_cast<void*>( OSRoundUp32B(dst)), clearSize);
    }
#endif
    hkUint32* dst32 = reinterpret_cast<      hkUint32*>(dst);
    for (int i = numWords-1; i>=0; i--)
    {
        *dst32 = value;
        dst32++;
    }
}

void HK_CALL hkString::memClear16(_Out_writes_bytes_(numQuads*16) void* dst, int numQuads)
{
#if defined(HK_PLATFORM_WIIU)
    hkUint32* actualStart = reinterpret_cast<hkUint32*>(dst);
    hkUint32* roundedUpStart = reinterpret_cast<hkUint32*>(OSRoundUp32B(dst));
    hkUint32* actualEnd = reinterpret_cast<hkUint32*>(dst + numQuads * 16);
    hkUint32* roundedDownEnd = reinterpret_cast<hkUint32*>(OSRoundDown32B(actualEnd));
    const int clearSize = reinterpret_cast<char*>(roundedDownEnd) - reinterpret_cast<char*>(roundedUpStart);
    if(clearSize > 0)
    {
        DCZeroRange(reinterpret_cast<void*>(roundedUpStart), clearSize);
    }
    // Fill in the ends
    for(hkUint32* dst32 = actualStart; dst32 < roundedUpStart; dst32++)
    {
        *dst32 = 0;
    }
    for(hkUint32* dstEnd32 = roundedDownEnd; dstEnd32 < actualEnd; dstEnd32++)
    {
        *dstEnd32 = 0;
    }
#elif (HK_CONFIG_SIMD == HK_CONFIG_SIMD_ENABLED)
    HK_ASSERT( 0xf021d445, (hkUlong(dst) & HK_NATIVE_ALIGN_CHECK ) == 0, "Unaligned address" );
    hkVector4f zero;
    zero = hkVector4f::getZero();   // using zero.setZero(); does not work, as the compiler will replace this loop with on out of line call to memset
    hkVector4f* dstQuad = (hkVector4f*)dst;
    for (int i = numQuads-1; i>=0; i--)
    {
        *(dstQuad++) = zero;
    }
#else

#if defined(HK_ALIGN_RELAX_CHECKS) // stack allocated vars not aligned to 16 etc, yet a lot of code uses this call to init the vars. As it is only using uint32, it only has to be 4 byte aligned anyway.
    HK_ASSERT( 0xf021d445, (hkUlong(dst)   & 0x03) == 0, "Unaligned address" );
#else
    HK_ASSERT( 0xf021d445, (hkUlong(dst)   & 0xf) == 0, "Unaligned address" );
#endif
    hkUint32* dst32 = reinterpret_cast<      hkUint32*>(dst);
    {
        for (int i = 0; i < numQuads; i++)
        {
            dst32[0] = 0;
            dst32[1] = 0;
            dst32[2] = 0;
            dst32[3] = 0;
            dst32+= 4;
        }
    }
#endif
}


// For size up to 512 bytes, on PlayStation(R)4, this will compile down to a sequence of store instructions
// For larger copies or other platforms, it reverts to the looped version.
template<int size>
void HK_CALL hkString::memSet16(_Out_writes_bytes_(size) void* dst, _In_reads_bytes_(16) const void* HK_RESTRICT src)
{
    HK_COMPILE_TIME_ASSERT( ((size & 0xf) == 0) && (size > 0) );

#if defined( HK_PLATFORM_PS4 )

    if (size < 32 * 16)
    {
        const hkQuadReal srcQuad = *reinterpret_cast<const hkQuadReal*>(src);
        hkQuadReal*      dstQuad =  reinterpret_cast<hkQuadReal*>(dst);

#define HK_SET_ELEM( X )  { if ( size > 16 * (X) ) dstQuad[X] = srcQuad; }
#define HK_SET_ELEM4( X )  { HK_SET_ELEM((X)+0); HK_SET_ELEM((X)+1); HK_SET_ELEM((X)+2); HK_SET_ELEM((X)+3);}

        HK_SET_ELEM4(0);
        HK_SET_ELEM4(4);
        HK_SET_ELEM4(8);
        HK_SET_ELEM4(12);

        HK_SET_ELEM4(16);
        HK_SET_ELEM4(20);
        HK_SET_ELEM4(24);
        HK_SET_ELEM4(28);

#undef HK_SET_ELEM4
#undef HK_SET_ELEM

    }
    else
#endif
    {
        hkString::memSet16(dst, src, size/16);
    }
}

void HK_CALL hkString::memSet16(_Out_writes_bytes_(numQuads*16) void* dst, _In_reads_bytes_(16) const void* value, _In_range_(>, 0) int numQuads)
{
#if defined(HK_PLATFORM_WIIU)
    const int clearSize = OSRoundDown32B(dst + numQuads * 16) - OSRoundUp32B(dst);
    if(clearSize > 0)
    {
        DCZeroRange(reinterpret_cast<void*>(OSRoundUp32B(dst)), clearSize);
    }
#endif
#if defined( HK_ARCH_IA32) || defined(HK_ARCH_X64)
    const hkQuadFloat32* valueQuad = reinterpret_cast<const hkQuadFloat32*>(value);
    hkQuadFloat32* dstQuad = reinterpret_cast<hkQuadFloat32*>(dst);
    {
        hkQuadFloat32 v = *(valueQuad);
        for (int i = numQuads-1; i>=0; i--)
        {
            *(dstQuad++) = v;
        }
    }
#else

    HK_ASSERT( 0xf021d445, (hkUlong(dst) & 0x03) == 0, "Unaligned address" );

    const hkUint32* value32 = reinterpret_cast<const hkUint32*>(value);
    hkUint32* dst32 = reinterpret_cast<      hkUint32*>(dst);
    {
        for (int i = 0; i < numQuads; i++)
        {
            dst32[0] = value32[0];
            dst32[1] = value32[1];
            dst32[2] = value32[2];
            dst32[3] = value32[3];
            dst32+= 4;
        }
    }
#endif
}

HK_INLINE int HK_CALL hkString::memCmpUint32(_In_reads_(n) const hkUint32* buf1, _In_reads_(n) const hkUint32* buf2, _In_range_(>, 0) int n)
{
    for (int i =0; i < n; i++)
    {
        if ( buf1[i] == buf2[i] )
        {
            continue;
        }
        if ( buf1[i] < buf2[i] )
        {
            return -1;
        }
        return 1;
    }
    return 0;
}

template<int _size>
HK_INLINE int HK_CALL hkString::vsnPrintf(_Out_writes_z_(n) char(&dst)[_size], int n, _Printf_format_string_ const char* fmt, va_list hkargs)
{
    return hkString::vsnPrintf(dst, _size, n, fmt, hkargs);
}

template<int _size>
HK_INLINE void HK_CALL hkString::strCpy(_Pre_cap_for_(src) char (&dst)[_size], _In_z_ const char* src)
{
    hkString::strCpy(dst, _size, src);
}

template<int _size>
HK_INLINE void HK_CALL hkString::strNcpy(_Out_writes_z_(n) char (&dst)[_size], _In_reads_z_(n) const char* src, int n)
{
    hkString::strNcpy(dst, _size, src, n);
}

/*
 * Havok SDK - Base file, BUILD(#20180110)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2018 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
