// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM   : ALL
// PRODUCT   : COMMON
// VISIBILITY   : PUBLIC
//
// ------------------------------------------------------TKBMS v1.0

#include <Common/Base/hkBase.h> // Precompiled Header

#include <Common/Base/Algorithm/Sort/hkRadixSort.h>
#include <Common/Base/Algorithm/Collide/1AxisSweep/hk1AxisSweep.h>

template<int flip>
HK_INLINE hkKeyPair* hk1AxisSweep_appendPair(const hk1AxisSweep::AabbInt& aabb0, const hk1AxisSweep::AabbInt& aabb1, _Inout_ptrdiff_count_(end) hkKeyPair* HK_RESTRICT pairOut, _Notvalid_ const hkKeyPair* HK_RESTRICT end, _Out_ int& numPairsSkipped)
{
    if ( pairOut < end)
    {
        // Disable if condition always true/false warnings
        HK_DETAIL_DIAG_MSVC_SUPPRESS(25041 25042)
        if ( !flip )
        {
            pairOut->m_keyA = aabb0.getKey();
            pairOut->m_keyB = aabb1.getKey();
        }
        else
        {
            pairOut->m_keyA = aabb1.getKey();
            pairOut->m_keyB = aabb0.getKey();
        }
        pairOut++;
    }
    else
    {
        numPairsSkipped = numPairsSkipped + 1;
    }

    return pairOut;
}

template<int flip>
HK_INLINE void hk1AxisSweep_arrayAppendPair(const hk1AxisSweep::AabbInt& aabb0, const hk1AxisSweep::AabbInt& aabb1, hkArray<hkKeyPair>& pairsOut)
{
    hkKeyPair& pairOut = pairsOut.expandOne();

    // Disable if condition always true/false warnings
    HK_DETAIL_DIAG_MSVC_SUPPRESS(25041 25042)
    if ( !flip )
    {
        pairOut.m_keyA = aabb0.getKey();
        pairOut.m_keyB = aabb1.getKey();
    }
    else
    {
        pairOut.m_keyA = aabb1.getKey();
        pairOut.m_keyB = aabb0.getKey();
    }
}

template<int flipKeys>
HK_INLINE void hk1AxisSweep_arrayScanList(const hk1AxisSweep::AabbInt& query, _In_ const hk1AxisSweep::AabbInt* HK_RESTRICT sxyz, hkArray<hkKeyPair>& pairsOut)
{
    hkUint32 maxX = query.m_max[0];
    while( sxyz->m_min[0] <= maxX )
    {
        int ov0 = hk1AxisSweep::AabbInt::yzDisjoint( query, sxyz[0] );
        int ov1 = hk1AxisSweep::AabbInt::yzDisjoint( query, sxyz[1] );
        int ov2 = hk1AxisSweep::AabbInt::yzDisjoint( query, sxyz[2] );
        int ov3 = hk1AxisSweep::AabbInt::yzDisjoint( query, sxyz[3] );

        if ( !((ov0&ov1) & (ov2&ov3)) )
        {
            if ( !ov0 )
            {
                hk1AxisSweep_arrayAppendPair<flipKeys>(query, sxyz[0], pairsOut);
            }
            if ( !ov1 && (sxyz[1].m_min[0] <= maxX) )
            {
                hk1AxisSweep_arrayAppendPair<flipKeys>(query, sxyz[1], pairsOut);
            }

            if ( !ov2 && (sxyz[2].m_min[0] <= maxX) )
            {
                hk1AxisSweep_arrayAppendPair<flipKeys>(query, sxyz[2], pairsOut);
            }
            if ( !ov3 && (sxyz[3].m_min[0] <= maxX) )
            {
                hk1AxisSweep_arrayAppendPair<flipKeys>(query, sxyz[3], pairsOut);
            }
        }

        sxyz += 4;
    }
}

template<int flipKeys>
HK_INLINE hkKeyPair* hk1AxisSweep_scanList( const hk1AxisSweep::AabbInt& query, _In_ const hk1AxisSweep::AabbInt* HK_RESTRICT sxyz,
    _Inout_ptrdiff_count_(end) hkKeyPair* HK_RESTRICT pairsOut, _Notvalid_ const hkKeyPair* HK_RESTRICT end, _Out_ int& numPairsSkipped )
{
    hkUint32 maxX = query.m_max[0];
    while( sxyz->m_min[0] <= maxX )
    {
        int ov0 = hk1AxisSweep::AabbInt::yzDisjoint( query, sxyz[0] );
        int ov1 = hk1AxisSweep::AabbInt::yzDisjoint( query, sxyz[1] );
        int ov2 = hk1AxisSweep::AabbInt::yzDisjoint( query, sxyz[2] );
        int ov3 = hk1AxisSweep::AabbInt::yzDisjoint( query, sxyz[3] );

        if ( !((ov0&ov1) & (ov2&ov3)) )
        {
            {
                if ( !ov0 )
                {
                    pairsOut = hk1AxisSweep_appendPair<flipKeys>( query, sxyz[0], pairsOut, end, numPairsSkipped  );
                }
                if (!ov1 )
                {
                    if ( sxyz[1].m_min[0] <= maxX )
                    {
                        pairsOut = hk1AxisSweep_appendPair<flipKeys>( query, sxyz[1], pairsOut, end, numPairsSkipped  );
                    }
                }
            }

            {
                if ( !ov2 )
                {
                    if ( sxyz[2].m_min[0] <= maxX )
                    {
                        pairsOut = hk1AxisSweep_appendPair<flipKeys>( query, sxyz[2], pairsOut, end, numPairsSkipped  );
                    }
                }
                if ( !ov3 )
                {
                    if ( sxyz[3].m_min[0] <= maxX )
                    {
                        pairsOut = hk1AxisSweep_appendPair<flipKeys>( query, sxyz[3], pairsOut, end, numPairsSkipped  );
                    }
                }
            }
        }
        sxyz+=4;
    }

    return pairsOut;
}


// Requires 4 elements of padding at the end
_Ret_range_(0, maxNumPairs)
int HK_CALL hk1AxisSweep::collide( _In_reads_(numA + 4) const hk1AxisSweep::AabbInt* pa, _In_range_(>, 0) int numA,
    _In_reads_(numB + 4) const hk1AxisSweep::AabbInt* pb, _In_range_(>, 0) int numB,
    _Inout_count_(maxNumPairs) hkKeyPair* HK_RESTRICT pairsOut, _In_range_(>, 0) int maxNumPairs, _Out_ int& numPairsSkipped)
{
#if defined(HK_DEBUG_SLOW)
    HK_ASSERT(0xad8750aa, numA == 0 || pa[numA-1].m_min[0] != hkUint32(-1), "numA should not include the padding elements at the end.");
    HK_ASSERT(0xad8756aa, numB == 0 || pb[numB-1].m_min[0] != hkUint32(-1), "numB should not include the padding elements at the end.");
    // assert that the input lists are sorted
    {   for (int i =0 ; i < numA-1; i++){ HK_ASSERT_NO_MSG( 0xf0341232, pa[i].m_min[0] <= pa[i+1].m_min[0]); }  }
    {   for (int i =0 ; i < numB-1; i++){ HK_ASSERT_NO_MSG( 0xf0341233, pb[i].m_min[0] <= pb[i+1].m_min[0]); }  }
    {   for (int q =0; q < 4; q++ ){    HK_ASSERT(0xad8757ab, pa[numA+q].m_min[0] == hkUint32(-1), "Four max-value padding elements are required at the end.");}    }
    {   for (int q =0; q < 4; q++ ){    HK_ASSERT(0xad8757ab, pb[numB+q].m_min[0] == hkUint32(-1), "Four max-value padding elements are required at the end.");}    }
#endif

    const hkKeyPair* end = pairsOut + maxNumPairs;
    hkKeyPair* HK_RESTRICT pairs = pairsOut;
    numPairsSkipped = 0;

    while ( true )
    {
        if ( pa->m_min[0] > pb->m_min[0] )
        {
            if ( numB-- <= 0 ) { break; }
            const bool flipKeys = true;
            pairs = hk1AxisSweep_scanList<flipKeys>( *pb, pa, pairs, end, numPairsSkipped );
            pb++;
        }
        else
        {
            if ( numA-- <= 0 ) { break; }
            const bool dontflipKeys = false;
            pairs = hk1AxisSweep_scanList<dontflipKeys>( *pa, pb, pairs, end, numPairsSkipped );
            pa++;
        }
    }
    return int(pairs - pairsOut);
}

void HK_CALL hk1AxisSweep::collide( _In_reads_(numA) const AabbInt* pa, _In_range_(>, 0) int numA,
    _In_reads_(numB) const AabbInt* pb, _In_range_(>, 0) int numB, hkArray<hkKeyPair>& pairsOut)
{
#if defined(HK_DEBUG_SLOW)
    HK_ASSERT(0xad8750aa, numA == 0 || pa[numA-1].m_min[0] != hkUint32(-1), "numA should not include the padding elements at the end.");
    HK_ASSERT(0xad8756aa, numB == 0 || pb[numB-1].m_min[0] != hkUint32(-1), "numB should not include the padding elements at the end.");
    // assert that the input lists are sorted
    {   for (int i =0 ; i < numA-1; i++){ HK_ASSERT_NO_MSG( 0xf0341232, pa[i].m_min[0] <= pa[i+1].m_min[0]); }  }
    {   for (int i =0 ; i < numB-1; i++){ HK_ASSERT_NO_MSG( 0xf0341233, pb[i].m_min[0] <= pb[i+1].m_min[0]); }  }
    {   for (int q =0; q < 4; q++ ){    HK_ASSERT(0xad8757ab, pa[numA+q].m_min[0] == hkUint32(-1), "Four max-value padding elements are required at the end.");}    }
    {   for (int q =0; q < 4; q++ ){    HK_ASSERT(0xad8757ab, pb[numB+q].m_min[0] == hkUint32(-1), "Four max-value padding elements are required at the end.");}    }
#endif

    while ( true )
    {
        if ( pa->m_min[0] > pb->m_min[0] )
        {
            if ( numB-- <= 0 ) { break; }
            hk1AxisSweep_arrayScanList<true>(*pb, pa, pairsOut);
            pb++;
        }
        else
        {
            if ( numA-- <= 0 ) { break; }
            hk1AxisSweep_arrayScanList<false>(*pa, pb, pairsOut);
            pa++;
        }
    }
}

// Requires 4 elements of padding at the end
_Ret_range_(0, maxNumPairs)
int HK_CALL hk1AxisSweep::collide( _In_reads_(numA) const hk1AxisSweep::AabbInt* pa, _In_range_(>, 0) int numA,
    _Inout_count_(maxNumPairs) hkKeyPair* HK_RESTRICT pairsOut, _In_range_(>, 0) int maxNumPairs, _Out_ int& numPairsSkipped)
{
#if defined(HK_DEBUG_SLOW)
    HK_ASSERT(0xad8751aa, numA == 0 || pa[numA-1].m_min[0] != hkUint32(-1), "numA should not include the padding elements at the end.");
    // assert that the input lists are sorted
    {   for (int i =0 ; i < numA-1; i++){ HK_ASSERT_NO_MSG( 0xf0341234, pa[i].m_min[0] <= pa[i+1].m_min[0]); }  }
    {   for (int q =0; q < 4; q++ ){    HK_ASSERT(0xad8757ab, pa[numA+q].m_min[0] == hkUint32(-1), "Four max-value padding elements are required at the end.");}    }
#endif

    const hkKeyPair* end = pairsOut + maxNumPairs;
    hkKeyPair* HK_RESTRICT pairs = pairsOut;
    numPairsSkipped = 0;

    while ( --numA > 0 )    // this iterates numA-1
    {
        const bool dontflipKeys = false;
        pairs = hk1AxisSweep_scanList<dontflipKeys>( *pa, pa+1, pairs, end, numPairsSkipped );
        pa++;
    }
    return int(pairs - pairsOut);
}

void HK_CALL hk1AxisSweep::collide( _In_reads_(numA) const hk1AxisSweep::AabbInt* pa, _In_range_(>, 0) int numA, hkArray<hkKeyPair>& pairsOut)
{
#if defined(HK_DEBUG_SLOW)
    HK_ASSERT(0xad8751aa, numA == 0 || pa[numA-1].m_min[0] != hkUint32(-1), "numA should not include the padding elements at the end.");
    // assert that the input lists are sorted
    {   for (int i =0 ; i < numA-1; i++){ HK_ASSERT_NO_MSG( 0xf0341234, pa[i].m_min[0] <= pa[i+1].m_min[0]); }  }
    {   for (int q =0; q < 4; q++ ){    HK_ASSERT(0xad8757ab, pa[numA+q].m_min[0] == hkUint32(-1), "Four max-value padding elements are required at the end.");}    }
#endif

    while ( --numA > 0 )    // this iterates numA-1
    {
        hk1AxisSweep_arrayScanList<false>(*pa, pa + 1, pairsOut);
        pa++;
    }
}

void HK_CALL hk1AxisSweep::sortAabbs( _Inout_updates_(HK_NEXT_MULTIPLE_OF(4, size)) hk1AxisSweep::AabbInt* aabbs, _In_range_(>, 0) int size)
{
    {
        // Make it multiple of 4
        // This is okay cos we know the AABBs array is padded with 4 extra entries
        int fixedSize = HK_NEXT_MULTIPLE_OF(4, size);

        hkArray<hkRadixSort::SortData32>::Temp sortArray(fixedSize);

        for (int i = 0; i < fixedSize; i++)
        {
            hkRadixSort::SortData32& entry = sortArray[i];

            entry.m_key = aabbs[i].m_min[0];
            entry.m_userData = i;
        }

        {
            hkArray<hkRadixSort::SortData32>::Temp buffer( fixedSize ) ;
            hkRadixSort::sort32(sortArray.begin(), fixedSize, buffer.begin());
        }

        hkArray<hk1AxisSweep::AabbInt>::Temp sortedAabbs(size);

        for (int i = 0; i < size; i++)
        {
            sortedAabbs[i] = aabbs[sortArray[i].m_userData];
        }

        // Copy back
        hkString::memCpy16(aabbs, sortedAabbs.begin(), size * sizeof(hk1AxisSweep::AabbInt)/16);
    }
}

HK_COMPILE_TIME_ASSERT(sizeof(hk1AxisSweep::AabbInt) >= sizeof(hkRadixSort::SortData32) );

void HK_CALL hk1AxisSweep::sortAabbs( _Inout_updates_(HK_NEXT_MULTIPLE_OF(4, size)) AabbInt* aabbs, _In_range_(>, 0) int size,
    hkArrayBase<hkRadixSort::SortData32>& sortArray, hkArrayBase<AabbInt>& sortedAabbs )
{
        // Make it multiple of 4
        // This is okay cos we know the AABBs array is padded with 4 extra entries
        int fixedSize = HK_NEXT_MULTIPLE_OF(4,size);

        HK_ASSERT_SLOW_NO_MSG(0x3bb9cae0, sortArray.getSize() >= fixedSize );
        HK_ASSERT_SLOW_NO_MSG(0x3bb9cae0, sortedAabbs.getSize() >= fixedSize );

        for (int i = 0; i < fixedSize; i++)
        {
            hkRadixSort::SortData32& entry = sortArray[i];

            entry.m_key = aabbs[i].m_min[0];
            entry.m_userData = i;
        }

        {
            // We need a buffer of fixedSize hkRadixSort::SortData32's
            // The sortedAabbs is at least that big
            hkRadixSort::SortData32* buffer = reinterpret_cast<hkRadixSort::SortData32*> (sortedAabbs.begin());
            hkRadixSort::sort32(sortArray.begin(), fixedSize, buffer );
        }

        for (int i = 0; i < size; i++)
        {
            sortedAabbs[i] = aabbs[sortArray[i].m_userData];
        }

        // Copy back
        hkString::memCpy16(aabbs, sortedAabbs.begin(), size * sizeof(hk1AxisSweep::AabbInt)/16);
}

hk1AxisSweep::IteratorAB::IteratorAB(_In_reads_(numA + 4) const hk1AxisSweep::AabbInt* pa, _In_range_(>, 0) int numA,
    _In_reads_(numB + 4) const hk1AxisSweep::AabbInt* pb, _In_range_(>, 0) int numB) :
    m_aIsBigger(pa[0].m_min[0] > pb[0].m_min[0]),
    m_currentPtr(HK_NULL), m_potentialPtr(HK_NULL),
    m_pa(pa), m_pb(pb),
    m_numA(numA), m_numB(numB)

#if defined(HK_DEBUG_SLOW)
    ,
    m_originalA(pa), m_originalB(pb),
    m_totalNumA(numA), m_totalNumB(numB)
#endif
{
    // same asserts as in hk1AxisSweep::collide
    HK_ASSERT_SLOW(0xad8750aa, numA == 0 || pa[numA - 1].m_min[0] != hkUint32(-1), "numA should not include the padding elements at the end.");
    HK_ASSERT_SLOW(0xad8756aa, numB == 0 || pb[numB - 1].m_min[0] != hkUint32(-1), "numA should not include the padding elements at the end.");

#if defined(HK_DEBUG_SLOW)
    {   for (int i = 0; i < 4; i++) { HK_ASSERT(0xad8757ab, pa[numA + i].m_min[0] == hkUint32(-1), "Four max-value padding elements are required at the end."); } }
    {   for (int i = 0; i < 4; i++) { HK_ASSERT(0xad8757ab, pb[numB + i].m_min[0] == hkUint32(-1), "Four max-value padding elements are required at the end."); } }

    // assert that the input lists are sorted
//  {   for (int i =0 ; i < numA-1; i++){ HK_ASSERT_NO_MSG( 0xf0341232, pa[i].m_min[0] <= pa[i+1].m_min[0]); }  }
//  {   for (int i =0 ; i < numB-1; i++){ HK_ASSERT_NO_MSG( 0xf0341233, pb[i].m_min[0] <= pb[i+1].m_min[0]); }  }
#endif

    m_currentPtr = (m_aIsBigger ? m_pb : m_pa);
    m_potentialPtr = (m_aIsBigger ? m_pa : m_pb);

    m_potentialPtr--; // counteract the increment that happens at the start of next()
    next();
}

HK_INLINE hkUint32 hkRealToOrderedUint(const hkReal& in)
{
#if defined(HK_REAL_IS_DOUBLE) && HK_ENDIAN_LITTLE
    hkInt32 i = ((hkInt32*)&in)[1];
#else
    hkInt32 i = ((hkInt32*)&in)[0];
#endif
    return (hkUint32(i >> 31) | hkUint32(0x80000000)) ^ hkUint32(i);
}



#if ((HK_CONFIG_SIMD == HK_CONFIG_SIMD_ENABLED) && defined(HK_COMPILER_HAS_INTRINSICS_IA32))

// SSE2 integer code!
#include <emmintrin.h>

HK_ALIGN16( static hkUint32 simdSignBit[4]) = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
HK_ALIGN16( static hkUint32 simdOne[4]) =     { 0x00000001, 0x00000001, 0x00000001, 0x00000001 };

void hk1AxisSweep::AabbInt::set( const hkAabb& aabbIn, int key )
{
    const __m128i signBit = _mm_load_si128( (const __m128i*)simdSignBit);
    const __m128i one = _mm_load_si128( (const __m128i*)simdOne);

#if defined(HK_REAL_IS_DOUBLE)
#if HK_SSE_VERSION >= 0x50
    __m128 minXYZW = _mm256_cvtpd_ps(aabbIn.m_min.m_quad);
    __m128i min = _mm_castps_si128(minXYZW);
#else
    __m128 minXY = _mm_cvtpd_ps(aabbIn.m_min.m_quad.xy);
    __m128 minZW = _mm_cvtpd_ps(aabbIn.m_min.m_quad.zw);
    __m128 minXYZW = _mm_shuffle_ps(minXY,minZW,_MM_SHUFFLE(1,0,1,0));
    __m128i min = _mm_castps_si128(minXYZW);
#endif
#else
    __m128i min = _mm_load_si128( (const __m128i*)&aabbIn.m_min);
#endif
    min = _mm_xor_si128(_mm_or_si128(_mm_srai_epi32(min, 31), signBit), min);

#if defined(HK_REAL_IS_DOUBLE)
#if HK_SSE_VERSION >= 0x50
    __m128 maxXYZW = _mm256_cvtpd_ps(aabbIn.m_max.m_quad);
    __m128i max = _mm_castps_si128(maxXYZW);
#else
    __m128 maxXY = _mm_cvtpd_ps(aabbIn.m_max.m_quad.xy);
    __m128 maxZW = _mm_cvtpd_ps(aabbIn.m_max.m_quad.zw);
    __m128 maxXYZW = _mm_shuffle_ps(maxXY,maxZW,_MM_SHUFFLE(1,0,1,0));
    __m128i max = _mm_castps_si128(maxXYZW);
#endif
#else
    __m128i max = _mm_load_si128( (const __m128i*)&aabbIn.m_max);
#endif
    max = _mm_xor_si128(_mm_or_si128(_mm_srai_epi32(max, 31), signBit), max);

    // Shift down
    min = _mm_srli_epi32(min, 1);
    max = _mm_add_epi32(_mm_srli_epi32(max, 1), one);

    // Set the key
    min = _mm_insert_epi16(min, key, 6);
    min = _mm_insert_epi16(min, hkUint32(key) >> 16, 7);

    // Store the result
    _mm_store_si128((__m128i*) &m_min, min);
    _mm_store_si128((__m128i*) &m_max, max);
}

#else

void hk1AxisSweep::AabbInt::set( const hkAabb& aabbIn, int key )
{
    // I need the shift because the max Uint allowed is 0x7fffffff
    m_min[0] = hkRealToOrderedUint(aabbIn.m_min(0)) >> 1;
    m_min[1] = hkRealToOrderedUint(aabbIn.m_min(1)) >> 1;
    m_min[2] = hkRealToOrderedUint(aabbIn.m_min(2)) >> 1;
    getKey() = key;

    // I add one to make sure all have volume.
    m_max[0] = (hkRealToOrderedUint(aabbIn.m_max(0)) >> 1) + 1;
    m_max[1] = (hkRealToOrderedUint(aabbIn.m_max(1)) >> 1) + 1;
    m_max[2] = (hkRealToOrderedUint(aabbIn.m_max(2)) >> 1) + 1;
}

#endif

/*
 * Havok SDK - Base file, BUILD(#20180110)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2018 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
