// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM   : ALL
// PRODUCT   : COMMON
// VISIBILITY   : PUBLIC
//
// ------------------------------------------------------TKBMS v1.0
#include <Common/Base/hkBase.h>
#include <Common/Base/Math/Vector/hkPackedVector3.h>
#include <Common/Base/Math/Vector/hkPackedVector3.inl>

const HK_ALIGN16( hkUint32 hkPackedVector3_exponentMask[4] ) =   { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };

const HK_ALIGN16( hkUint32 hkPackedVector3_offsetCst[4] )    =   { 0x4e800000, 0x4e800000, 0x4e800000, 0x4e800000 };
const HK_ALIGN16( hkUint32 hkPackedVector3_8_offsetCst[4] )  =   { 0x4e800000, 0x4e800000, 0x4e800000, 0x4e800000 };

const HK_ALIGN16( hkUint32 hkPackedVector3_rounding[4] )     = { 0x3F800089, 0x3F800089, 0x3F800089, 0x3F800089 };
const HK_ALIGN16( hkUint32 hkPackedVector8_3_rounding[4] )   = { 0x3F808889, 0x3F808889, 0x3F808889, 0x3F808889 };
const HK_ALIGN16( hkUint32 hkPackedVector4_3_rounding[4] )   = { 0x3F888889, 0x3F888889, 0x3F888889, 0x3F888889 };

const HK_ALIGN16( hkUint32 hkPackedVector3_roundingCorrectionCst[4] )   = { 0x00008000, 0x00008000, 0x00008000, 0x3F800000 };
const HK_ALIGN16( hkUint32 hkPackedVector8_3_roundingCorrectionCst[4] ) = { 0x00800000, 0x00800000, 0x00800000, 0x3F800000*2 };
const HK_ALIGN16( hkUint32 hkPackedVector4_3_roundingCorrectionCst[4] ) = { 0x08000000, 0x08000000, 0x08000000, 0x3F800000*2 };

const HK_ALIGN16( hkUint32 hkPackedUnitVector_m_offset[4] )             = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
const HK_ALIGN16( hkUint32 hkPackedVector4_6_v0Mask[4])                 = { 0xf0000000, 0xf0000000, 0xf0000000, 0xf0000000 };

const HK_ALIGN16( hkUint32 hkPackedVector4_6_valueBits[4])              = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff };

#define HK_QUADFLOAT_SINGLE(X) { X,X,X,X }
HK_ALIGN16(const float hkPackedUnitVector8_offset[4])       = HK_QUADFLOAT_SINGLE(127.5f);
HK_ALIGN16(const float hkPackedUnitVector8_packFactor[4])   = HK_QUADFLOAT_SINGLE(127.0f);
HK_ALIGN16(const float hkPackedUnitVector8_unpackFactor[4]) = HK_QUADFLOAT_SINGLE(1.0f/127.0f);


#if HK_ENDIAN_LITTLE == 1   // intel
const HK_ALIGN16( hkUchar hkPackedVector3_PermuteMask[16] )  = {    2,3,6,7,    10,11,14,15,    2,3,6,7,    10,11,14,15 };
const HK_ALIGN16( hkUchar hkPackedVector8_3_PermuteMask[16] ) = {   3,7,11,15,  3,7,11,15,  3,7,11,15,  3,7,11,15 };
#else   // ppc etc
const HK_ALIGN16( hkUchar hkPackedVector3_PermuteMask[16] )   = {   0,1,4,5,    8,9,12,13,  0,1,4,5,    8,9,12,13 };
const HK_ALIGN16( hkUchar hkPackedVector8_3_PermuteMask[16] ) = {   0,4,8,12,   0,4,8,12,   0,4,8,12,   0,4,8,12 };
#endif

void hkPackedVector3::pack( hkVector4fParameter vIn )
{
    _pack( vIn );
}

void hkPackedVector3::pack( hkVector4dParameter vIn )
{
    hkVector4f p;
#ifdef HK_REAL_IS_DOUBLE
    p.load<4, HK_IO_NATIVE_ALIGNED>( &vIn(0) );
#else
    p.load<4>( &vIn(0) );
#endif
    _pack(p);
}

void hkPackedVector8_3::pack( hkVector4fParameter vIn )
{
    _pack( vIn );
}


void hkPackedVector8_3::pack( hkVector4dParameter vIn )
{
    hkVector4f p; p.load<4>( &vIn(0) );
    _pack(p);
}


template <int numBitsExp0>
void hkPackedVector4_6::_pack( hkVector4f_ v0, hkVector4f_ v1 )
{

    //
    // get the maximum absolute value3 and remove any mantissa bits
    //
    hkIntVector iMa0;
    hkIntVector iMa1;
    {
        // We set the .w component to a very tiny number, so that our horizontalMax4 always returns a 'normal' number, even if vIn is zero
        hkVector4f w0; w0.setXYZ_W( v0, hkSimdFloat32_EpsSqrd );
        hkVector4f w1; w1.setXYZ_W( v1, hkSimdFloat32_EpsSqrd );

        //
        // we need to increase the max by the rounding done later to avoid an overflow
        //
        hkVector4f rounding; rounding.load<4>( reinterpret_cast<const hkFloat32*>(hkPackedVector4_3_rounding));
        hkVector4f rounded0 = rounding * w0;
        hkVector4f rounded1 = rounding * w1;

        hkIntVector mask; mask.load<4>(hkPackedVector3_exponentMask);

        iMa0.loadAsFloat32BitRepresentation(rounded0);
        iMa1.loadAsFloat32BitRepresentation(rounded1);
        iMa0.setAnd(iMa0, mask);
        iMa1.setAnd(iMa1, mask);
        hkPackedVector_setHorizontalMax( iMa0 );
        hkPackedVector_setHorizontalMax( iMa1 );
    }

    //
    // divide by maximum exponent
    //
    hkIntVector iv0;
    hkIntVector iv1;
    {
        iv0.loadAsFloat32BitRepresentation(v0);
        iv1.loadAsFloat32BitRepresentation(v1);
        hkIntVector offset; offset.load<4>(hkPackedVector3_8_offsetCst);

        iv0.setAddU32( iv0, offset );
        iv1.setAddU32( iv1, offset );
        iv0.setSubU32( iv0, iMa0 );
        iv1.setSubU32( iv1, iMa1 );
    }
    int exp0 = iMa0.getComponent<0>();
    int exp1 = iMa1.getComponent<0>();
    exp0 >>= 23;
    exp1 >>= 23;
    exp0 -= 0x7f;   // 00011111 means no exp shift, so a 1 is the mean
    exp1 -= 0x7f;


    hkVector4 correctedV0;  iv0.storeAsFloat32BitRepresentation( correctedV0 );
    hkVector4 correctedV1;  iv1.storeAsFloat32BitRepresentation( correctedV1 );

    //
    // Convert to integer
    //
    hkIntVector result0; result0.setConvertF32toS32( correctedV0 );
    hkIntVector result1; result1.setConvertF32toS32( correctedV1 );

    //
    //  Rounding correction
    //
    hkIntVector roundingCorrection; roundingCorrection.load<4>(hkPackedVector4_3_roundingCorrectionCst);
    result0.setAddU32( result0, roundingCorrection);
    result1.setAddU32( result1, roundingCorrection);

    //
    // now we need to make sure both values don't differ too much
    //

    //
    // Now clip v0 and v1 against the maximum value and v0 against the smallest number
    //
    int maxExp0 = (1<<(numBitsExp0-1));
    int minExp0 = maxExp0-((1<<numBitsExp0)-1);
    {
        if ( exp0 > maxExp0 )
        {
            result0.setShiftRightS32<8>( result0 ); // duplicate the sign bit
            result0.setXor( result0, *(const hkIntVector*)hkPackedVector4_6_valueBits );    //set to max
            exp0 = maxExp0;
        }

        if ( exp1 > maxExp0 )
        {
            result1.setShiftRightS32<8>( result1 ); // duplicate the sign bit
            result1.setXor( result1, *(const hkIntVector*)hkPackedVector4_6_valueBits );    //set to max
            exp1 = maxExp0;
        }
    }

    /// how much smaller can exp1 be compared to exp1
    int numBitsExp1 = 8 - numBitsExp0;
    const int EXP1_MAX_DECREMENT = (1<<numBitsExp1) - (1 + EXP1_MAX_INCREMENT);

    // v0 is very small, make v0 a denormal
    int minRel0 = exp1-EXP1_MAX_INCREMENT;  // exp1 cannot be much bigger than exp0, so clamp it.
    int m0 = hkMath::max2( minExp0, minRel0 );
    if ( exp0 < m0 )
    {
        int diff = m0 - exp0;
        result0.setShiftRightS32(result0, diff);
        exp0 += diff;   // this brings exp0 to m0
    }

    int minExp1 = exp0 - EXP1_MAX_DECREMENT;
    if ( exp1 < minExp1 )
    {
        int diff = minExp1 - exp1;
        result1.setShiftRightS32(result1, diff);
        exp1 += diff;   // this brings exp0 to minExp0
    }

    exp1 -= exp0;               // store exp1 is relative to exp0
    exp1 += EXP1_MAX_DECREMENT; // bring it into positive space
    exp0 -= minExp0;            // bring it into positive space
    HK_ASSERT_NO_MSG( 0xf04565fe, exp0 >=0 && exp0 < (1<<numBitsExp0) && exp1 >=0 && exp1 <= (1<<numBitsExp1));

    int exponent = (exp0 << numBitsExp1) | ( exp1 );

    //
    //  Merge values
    //
    result0.setAnd( result0, *(hkIntVector*)hkPackedVector4_6_v0Mask );
    result1.setAnd( result1, *(hkIntVector*)hkPackedVector4_6_v0Mask );
    result1.setShiftRight32<4>( result1 );
    hkIntVector result; result.setOr( result0, result1 );

#if !defined(HK_INT_VECTOR_NATIVE_PERMUTE8)
#   if HK_ENDIAN_LITTLE == 1
        const int endianOffset8  = 3;
#   else
        const int endianOffset8 = 0;
#   endif
    m_values.m_u8[0] = result.getU8<0+endianOffset8>();
    m_values.m_u8[1] = result.getU8<4+endianOffset8>();
    m_values.m_u8[2] = result.getU8<8+endianOffset8>();
#else
    result.setPermuteU8( result, (hkIntVector&)hkPackedVector8_3_PermuteMask );
    result.store<1>( &m_values.m_u32 );
#endif
    // store exponent after writing the values
    m_values.m_u8[3] = (hkUchar)exponent;
}


void hkPackedVector4_6::pack5( hkVector4f_ v0, hkVector4f_ v1 )
{
    _pack<5>( v0, v1 );
}

void hkPackedVector4_6::pack5( hkVector4d_ v0, hkVector4d_ v1 )
{
    hkVector4f a; a.set(v0);
    hkVector4f b; b.set(v1);
    pack5( a, b );
}

void hkPackedVector4_6::unpack5( hkVector4d& v0, hkVector4d& v1 ) const
{
    hkVector4f a, b; unpack<5>( a, b );
    v0.set(a);
    v1.set(b);
}

/*
 * Havok SDK - Base file, BUILD(#20180110)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2018 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
