// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM   : ALL
// PRODUCT   : COMMON
// VISIBILITY   : PUBLIC
//
// ------------------------------------------------------TKBMS v1.0


#if ((defined(HK_PLATFORM_LINUX) || defined(HK_PLATFORM_WIN32) || defined(HK_PLATFORM_PS4)) && (HK_CONFIG_SIMD == HK_CONFIG_SIMD_ENABLED) && defined(HK_COMPILER_HAS_INTRINSICS_IA32))
#   define ALL_SIMD_SSE_PLATFORMS 1
#else
#   define ALL_SIMD_SSE_PLATFORMS 0
#endif


//
//  Computes a 4x4 matrix that allows you to compute any product of the form (this * q) by multiplying (opOut * q)
//  Let:
//      this    = (pV, pS)
//      q       = (qV, qS)
//  Then:
//      this * q    =   (pV * qS + pS * qV + hat(pV) * qV, pS * qS - dot(pV, qV)) = opOut * q
//      opOut       =   | (pS * Identity(3) + hat(pV))      pV  |
//                      | -Transpose(pV)                    pS  |
//  If:
//      this    = (x, y, z, w), pS = w, pV = (x, y, z)
//      opOut   =   |    w      -z       y      x   |
//                  |    z       w      -x      y   |
//                  |   -y       x       w      z   |
//                  |   -x      -y      -z      w   |

template <typename FT>
HK_INLINE void HK_CALL hkQuaternionUtilImpl<FT>::_computePostMultiplyOperator(typename hkFloatTypes<FT>::Quaternion_ q, typename hkFloatTypes<FT>::Matrix4& opOut)
{
    // Shuffle the quantities
    typename hkFloatTypes<FT>::Vector c0;   c0.template setPermutation<hkVectorPermutation::WZYX>(q.m_vec); // c0 = (w, z, y, x)
    typename hkFloatTypes<FT>::Vector c1;   c1.template setPermutation<hkVectorPermutation::ZWXY>(q.m_vec); // c1 = (z, w, x, y)
    typename hkFloatTypes<FT>::Vector c2;   c2.template setPermutation<hkVectorPermutation::YXWZ>(q.m_vec); // c2 = (y, x, w, z)

    typename hkFloatTypes<FT>::Comparison s0;   s0.template set<hkVector4ComparisonMask::MASK_ZW>();            // s0 = (1, 1, -1, -1)
    typename hkFloatTypes<FT>::Comparison s1;   s1.template set<hkVector4ComparisonMask::MASK_XW>();            // s1 = (-1, 1, 1, -1)
    typename hkFloatTypes<FT>::Comparison s2;   s2.template set<hkVector4ComparisonMask::MASK_YW>();            // s2 = (1, -1, 1, -1)

    // Set-up the output matrix
    c0.setFlipSign(c0, s0);     //  w  z -y -x
    c1.setFlipSign(c1, s1);     // -z  w  x -y
    c2.setFlipSign(c2, s2);     //  y -x  w -z

    opOut.setCols(c0, c1, c2, q.m_vec);
}

//
//  Computes a 4x4 matrix that allows you to compute any product of the form (q * this) by multiplying (opOut * q)
//  Let:
//      this    = (pV, pS)
//      q       = (qV, qS)
//  Then:
//      q * this    =   (pV * qS + pS * qV - hat(pV) * qV, pS * qS - dot(pV, qV)) = opOut * q
//      opOut       =   | (pS * Identity(3) - hat(pV))      pV  |
//                      | -Transpose(pV)                    pS  |
//  If:
//      this    = (x, y, z, w), pS = w, pV = (x, y, z)
//      opOut   =   |    w       z      -y      x   |
//                  |   -z       w       x      y   |
//                  |    y      -x       w      z   |
//                  |   -x      -y      -z      w   |

template <typename FT>
HK_INLINE void HK_CALL hkQuaternionUtilImpl<FT>::_computePreMultiplyOperator(typename hkFloatTypes<FT>::Quaternion_ q, typename hkFloatTypes<FT>::Matrix4& opOut)
{
    // Shuffle the quantities
    typename hkFloatTypes<FT>::Vector c0;   c0.template setPermutation<hkVectorPermutation::WZYX>(q.m_vec); // c0 = (w, z, y, x)
    typename hkFloatTypes<FT>::Vector c1;   c1.template setPermutation<hkVectorPermutation::ZWXY>(q.m_vec); // c1 = (z, w, x, y)
    typename hkFloatTypes<FT>::Vector c2;   c2.template setPermutation<hkVectorPermutation::YXWZ>(q.m_vec); // c2 = (y, x, w, z)

    typename hkFloatTypes<FT>::Comparison s0;   s0.template set<hkVector4ComparisonMask::MASK_YW>();            // s0 = (1, -1, 1, -1)
    typename hkFloatTypes<FT>::Comparison s1;   s1.template set<hkVector4ComparisonMask::MASK_ZW>();            // s1 = (1, 1, -1, -1)
    typename hkFloatTypes<FT>::Comparison s2;   s2.template set<hkVector4ComparisonMask::MASK_XW>();            // s2 = (-1, 1, 1, -1)

    // Set-up the output matrix
    c0.setFlipSign(c0, s0); //  w -z  y -x
    c1.setFlipSign(c1, s1); //  z  w -x -y
    c2.setFlipSign(c2, s2); // -y  x  w -z

    opOut.setCols(c0, c1, c2, q.m_vec);
}

//
//  Computes a 4x4 matrix that allows you to compute any product of the form (Inverse(this) * q) by multiplying (opOut * q)
//  If:
//      this    = (x, y, z, w), pS = w, pV = (x, y, z)
//      opOut   =   |    w       z      -y      -x  |
//                  |   -z       w       x      -y  |
//                  |    y      -x       w      -z  |
//                  |    x       y       z       w  |

template <typename FT>
HK_INLINE void HK_CALL hkQuaternionUtilImpl<FT>::_computeInversePostMultiplyOperator(typename hkFloatTypes<FT>::Quaternion_ q, typename hkFloatTypes<FT>::Matrix4& opOut)
{
    // Shuffle the quantities
    typename hkFloatTypes<FT>::Vector c0;   c0.template setPermutation<hkVectorPermutation::WZYX>(q.m_vec); // c0 = (w, z, y, x)
    typename hkFloatTypes<FT>::Vector c1;   c1.template setPermutation<hkVectorPermutation::ZWXY>(q.m_vec); // c1 = (z, w, x, y)
    typename hkFloatTypes<FT>::Vector c2;   c2.template setPermutation<hkVectorPermutation::YXWZ>(q.m_vec); // c2 = (y, x, w, z)
    typename hkFloatTypes<FT>::Vector c3        = q.m_vec;                                                      // c3 = (x, y, z, w)

    typename hkFloatTypes<FT>::Comparison s0;   s0.template set<hkVector4ComparisonMask::MASK_Y>();         // s0 = (1, -1, 1, 1)
    typename hkFloatTypes<FT>::Comparison s1;   s1.template set<hkVector4ComparisonMask::MASK_Z>();         // s1 = (1, 1, -1, 1)

    // Set-up the output matrix
    c0.setFlipSign(c0, s0);             //  w -z  y  x
    c1.setFlipSign(c1, s1);             //  z  w -x  y
    c2.template setNeg<1>(c2);          // -y  x  w  z
    c3.template setNeg<3>(c3);          // -x -y -z  w

    opOut.setCols(c0, c1, c2, c3);
}

//
//  Computes a 4x4 matrix that allows you to compute any product of the form (q * Inverse(this)) by multiplying (opOut * q)
//  If:
//      this    = (x, y, z, w), pS = w, pV = (x, y, z)
//      opOut   =   |    w      -z       y      -x  |
//                  |    z       w      -x      -y  |
//                  |   -y       x       w      -z  |
//                  |    x       y       z       w  |

template <typename FT>
HK_INLINE void HK_CALL hkQuaternionUtilImpl<FT>::_computeInversePreMultiplyOperator(typename hkFloatTypes<FT>::Quaternion_ q, typename hkFloatTypes<FT>::Matrix4& opOut)
{
    typename hkFloatTypes<FT>::Vector c0;   c0.template setPermutation<hkVectorPermutation::WZYX>(q.m_vec); // c0 = (w, z, y, x)
    typename hkFloatTypes<FT>::Vector c1;   c1.template setPermutation<hkVectorPermutation::ZWXY>(q.m_vec); // c1 = (z, w, x, y)
    typename hkFloatTypes<FT>::Vector c2;   c2.template setPermutation<hkVectorPermutation::YXWZ>(q.m_vec); // c2 = (y, x, w, z)
    typename hkFloatTypes<FT>::Vector c3        = q.m_vec;                                                      // c3 = (x, y, z, w)

    typename hkFloatTypes<FT>::Comparison s0;   s0.template set<hkVector4ComparisonMask::MASK_Z>();         // s0 = (1,  1, -1, 1)
    typename hkFloatTypes<FT>::Comparison s2;   s2.template set<hkVector4ComparisonMask::MASK_Y>();         // s2 = (1, -1,  1, 1)

    // Set-up the output matrix
    c0.setFlipSign(c0, s0);             //  w  z -y  x
    c1.template setNeg<1>(c1);          // -z  w  x  y
    c2.setFlipSign(c2, s2);             //  y -x  w  z
    c3.template setNeg<3>(c3);          // -x -y -z  w

    opOut.setCols(c0, c1, c2, c3);
}

//
//  Sets this = Transpose(op) * q, where op is a 4x4 matrix operator and q is a quaternion interpreted as a 4-element vector.

template <typename FT>
HK_INLINE void HK_CALL hkQuaternionUtilImpl<FT>::_computeTransposeMul(const typename hkFloatTypes<FT>::Matrix4& op, typename hkFloatTypes<FT>::Quaternion_ qIn, typename hkFloatTypes<FT>::Quaternion& qOut)
{
    hkVector4UtilImpl<FT>::dot4_1vs4(qIn.m_vec, op.template getColumn<0>(), op.template getColumn<1>(), op.template getColumn<2>(), op.template getColumn<3>(), qOut.m_vec);
}

//
//  N-lerp

template <typename FT>
HK_INLINE void HK_CALL hkQuaternionUtilImpl<FT>::_computeNlerp(typename hkFloatTypes<FT>::Quaternion_ q0, typename hkFloatTypes<FT>::Quaternion_ q1, typename hkFloatTypes<FT>::Scalar_ t, typename hkFloatTypes<FT>::Quaternion& qOut)
{
    typename hkFloatTypes<FT>::Vector v;
    v.setInterpolate(q0.m_vec, q1.m_vec, t);
    v.template normalize<4>();
    qOut.m_vec = v;
}

#if ALL_SIMD_SSE_PLATFORMS
static HK_INLINE hkSingleFloat32 __sin(const hkSingleFloat32& r)
{
    return hkMath::quadSin(r);
}
static HK_INLINE hkSingleDouble64 __sin(const hkSingleDouble64& r)
{
    return hkMath::twoSin(r);
}
#endif
//
//  Computes the logarithm of a quaternion

template <typename FT>
HK_INLINE void HK_CALL hkQuaternionUtilImpl<FT>::_computeLog(typename hkFloatTypes<FT>::Quaternion_ q, typename hkFloatTypes<FT>::Vector& vOut)
{
    // Get axis and cos(angle)
    const typename hkFloatTypes<FT>::Scalar cosA = q.getRealPart();
    vOut = q.getImag();

    // If the angle is not zero, return angle * axis,
    // otherwise return just axis, because axis = sin(angle) * direction
    // and for very small angles, the angle / sin(angle) -> 1.
    typename hkFloatTypes<FT>::Scalar absCosA; absCosA.setAbs(cosA);
    if ( absCosA.isLess(hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_1>()) )
    {
        typename hkFloatTypes<FT>::Scalar a,sA;
        const typename hkFloatTypes<FT>::Scalar qReal = q.getRealPart(); // use indep cosA
        a = hkVector4UtilImpl<FT>::aCos(qReal);
#if ALL_SIMD_SSE_PLATFORMS
        sA.m_real = __sin(a.m_real);
#else
        const typename hkFloatTypes<FT>::Float sinA = hkMath::sin(a.getReal());
        sA.setFromFloat(sinA);
#endif
        typename hkFloatTypes<FT>::Scalar absSinA; absSinA.setAbs(sA);
        const typename hkFloatTypes<FT>::Comparison absSinAGreaterEqualEps = absSinA.greaterEqual(hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_EPS>());

        typename hkFloatTypes<FT>::Scalar d; d.setDiv(a,sA);
        typename hkFloatTypes<FT>::Vector logV; logV.setMul(vOut, d);

        vOut.setSelect(absSinAGreaterEqualEps, logV, vOut);
    }
}

//
//  Set the quaternion as the exponential of the given (axis-angle) vector

template <typename FT>
HK_INLINE void HK_CALL hkQuaternionUtilImpl<FT>::_computeExp(typename hkFloatTypes<FT>::Vec4_ vIn, typename hkFloatTypes<FT>::Quaternion& qOut)
{
    const typename hkFloatTypes<FT>::Scalar angle = vIn.template length<3>();

    typename hkFloatTypes<FT>::Vector sc;
    hkVector4UtilImpl<FT>::sinCos(angle,sc);
    const typename hkFloatTypes<FT>::Scalar sA = sc.template getComponent<0>();

    typename hkFloatTypes<FT>::Scalar absSinA; absSinA.setAbs(sA);
    const typename hkFloatTypes<FT>::Comparison absSinAGreaterEqualEps = absSinA.greaterEqual(hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_EPS>());

    typename hkFloatTypes<FT>::Scalar d; d.setDiv(sA,angle);
    typename hkFloatTypes<FT>::Vector expV; expV.setMul(vIn, d);

    typename hkFloatTypes<FT>::Vector quatDir; quatDir.setSelect(absSinAGreaterEqualEps, expV, vIn);

    qOut.m_vec.setXYZ_W(quatDir, sc.template getComponent<1>());
}

template <typename FT>
HK_INLINE void HK_CALL hkQuaternionUtilImpl<FT>::_computeExp_Approximate(typename hkFloatTypes<FT>::Vec4_ vIn, typename hkFloatTypes<FT>::Quaternion& qOut)
{
#if ALL_SIMD_SSE_PLATFORMS
    const typename hkFloatTypes<FT>::Scalar angle = vIn.template length<3>();

    typename hkFloatTypes<FT>::Vector sinCos;
    hkVector4UtilImpl<FT>::sinCosApproximation(angle,sinCos);
    typename hkFloatTypes<FT>::Scalar sA = sinCos.template getComponent<0>();

    typename hkFloatTypes<FT>::Scalar absSinA; absSinA.setAbs(sA);
    const typename hkFloatTypes<FT>::Comparison absSinAGreaterEqualEps = absSinA.greaterEqual(hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_EPS>());

    typename hkFloatTypes<FT>::Scalar d; d.template setDiv<HK_ACC_12_BIT,HK_DIV_IGNORE>(sA,angle);
    typename hkFloatTypes<FT>::Vector expV; expV.setMul(vIn, d);

    typename hkFloatTypes<FT>::Vector quatDir; quatDir.setSelect(absSinAGreaterEqualEps, expV, vIn);

    qOut.m_vec.setXYZ_W(quatDir, sinCos.template getComponent<1>());
#else
    _computeExp(vIn, qOut);
#endif
}

template <typename FT>
HK_INLINE void HK_CALL hkQuaternionUtilImpl<FT>::_computeShortestRotation(typename hkFloatTypes<FT>::Vec4_ from, typename hkFloatTypes<FT>::Vec4_ to, typename hkFloatTypes<FT>::Quaternion& qOut)
{
    HK_MATH_ASSERT(0xad87ba22, from.template isNormalized<3>() && to.template isNormalized<3>(), "hkQuaternion::setShortestRotation input vectors are not normalized.");

#if (HK_CONFIG_SIMD == HK_CONFIG_SIMD_ENABLED)

    const typename hkFloatTypes<FT>::Scalar dotProd = from.template dot<3>( to ); // cos(theta)

    typename hkFloatTypes<FT>::Scalar nearlyOne; nearlyOne.setFromFloat(typename hkFloatTypes<FT>::Float(1.0f - 1e-5f));

    if( dotProd.isGreater(nearlyOne) )
    {
        qOut.setIdentity();
    }
    else
    {
        nearlyOne = -nearlyOne;

        if( dotProd.isLess(nearlyOne) )
        {
            qOut.setFlippedRotation( from );
        }
        else
        {
            // Using cos(theta) = 2*cos^2(theta/2) - 1
            const typename hkFloatTypes<FT>::Scalar oneHalf = hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_INV_2>();
            typename hkFloatTypes<FT>::Scalar c; c.setAddMul(oneHalf, oneHalf, dotProd); // .5 * ( 1 + dotProd)
            const typename hkFloatTypes<FT>::Scalar sqrtInverseC = c.sqrtInverse();

            //const typename hkFloatTypes<FT>::Pod cosT2 = hkMath::sqrt( c );

            typename hkFloatTypes<FT>::Vector cross;
            cross.setCross( from, to ); // ~sin(theta)* axis -- this is not exact sin(theta) .. error around 1-e2 for angles close to 180deg

            //typename hkFloatTypes<FT>::Pod rescaleSin  = 0.5f / cosT2;
            typename hkFloatTypes<FT>::Scalar rescaleSin = oneHalf * sqrtInverseC;

            typename hkFloatTypes<FT>::Scalar somewhatNearlyNegOne; somewhatNearlyNegOne.setFromFloat(typename hkFloatTypes<FT>::Float(-(1.0f - 1e-3f)));

            if (dotProd.isLess(somewhatNearlyNegOne))
            {
                // Extra correction needed for angles around 180deg
                //
                //const typename hkFloatTypes<FT>::Pod sinT2 = hkMath::sqrt( cosT2 * cosT2 - dotProd );
                const typename hkFloatTypes<FT>::Scalar sinT4 = c - dotProd;
                const typename hkFloatTypes<FT>::Scalar invApproxSinT = cross.template lengthInverse<3>(); // this has errors around 1-e2 for angles around 180 deg.
                //const typename hkFloatTypes<FT>::Pod sinT = 2 * sinT2 * cosT2;
                //rescaleSin *= (sinT / approxSinT);

                //rescaleSin = sinT2 / approxSinT;
                rescaleSin = sinT4 * sinT4.sqrtInverse() * invApproxSinT;
            }

            // Using sin(theta) = 2*cos(theta/2)*sin(theta/2)
            cross.mul( rescaleSin );
            qOut.m_vec.setXYZ_W( cross, c * sqrtInverseC ); // = sqrt(c)
        }
    }

#else

    const typename hkFloatTypes<FT>::Float dotProd = from.template dot<3>( to ).getReal(); // cos(theta)
    const typename hkFloatTypes<FT>::Float nearlyOne = typename hkFloatTypes<FT>::Float(1.0f - 1e-5f);
    const typename hkFloatTypes<FT>::Float somewhatNearlyNegativeOne = typename hkFloatTypes<FT>::Float(-(1.0f - 1e-3f));
    if( dotProd > nearlyOne )
    {
        qOut.setIdentity();
    }
    else if( dotProd < -nearlyOne )
    {
        qOut.setFlippedRotation( from );
    }
    else
    {
        // Using cos(theta) = 2*cos^2(theta/2) - 1
        const typename hkFloatTypes<FT>::Float c = (dotProd + typename hkFloatTypes<FT>::Float(1)) * typename hkFloatTypes<FT>::Float(0.5f);
        const typename hkFloatTypes<FT>::Float cosT2 = hkMath::sqrt( c );

        typename hkFloatTypes<FT>::Vector cross;
        cross.setCross( from, to ); // ~sin(theta)* axis -- this is not exact sin(theta) .. error around 1-e2 for angles close to 180deg

        typename hkFloatTypes<FT>::Float rescaleSin  = typename hkFloatTypes<FT>::Float(0.5f) / cosT2;

        if (dotProd < somewhatNearlyNegativeOne)
        {
            // Extra correction needed for angles around 180deg
            //
            const typename hkFloatTypes<FT>::Float sinT2 = hkMath::sqrt( cosT2 * cosT2 - dotProd );
            const typename hkFloatTypes<FT>::Float approxSinT = cross.template length<3>().getReal(); // this has errors around 1-e2 for angles around 180 deg.
            const typename hkFloatTypes<FT>::Float sinT = typename hkFloatTypes<FT>::Float(2) * sinT2 * cosT2;
            rescaleSin *= (sinT / approxSinT);
        }

        // Using sin(theta) = 2*cos(theta/2)*sin(theta/2)
        typename hkFloatTypes<FT>::Scalar rescaleSinV; rescaleSinV.setFromFloat(rescaleSin);
        cross.mul( rescaleSinV );
        cross(3) = cosT2;
        qOut.m_vec = cross;
    }

#endif

    HK_MATH_ASSERT(0xad78999d, qOut.isOk(), "Resulting quaternion is not normalized.");
}

template <typename FT>
HK_INLINE void HK_CALL hkQuaternionUtilImpl<FT>::_computeShortestRotationDamped(typename hkFloatTypes<FT>::Vec4_ from, typename hkFloatTypes<FT>::Vec4_ to, typename hkFloatTypes<FT>::Scalar_ gain, typename hkFloatTypes<FT>::Quaternion& qOut)
{
    const typename hkFloatTypes<FT>::Scalar one =  hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_1>();
    const typename hkFloatTypes<FT>::Scalar half = hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_INV_2>();

    const typename hkFloatTypes<FT>::Scalar dotProd = from.template dot<3>( to ); // cos(theta)
    const typename hkFloatTypes<FT>::Scalar dampedDot = one - gain + gain * dotProd;
    typename hkFloatTypes<FT>::Scalar nearlyOne; nearlyOne.setFromFloat(typename hkFloatTypes<FT>::Float(1.0f - 1e-6f));

    const typename hkFloatTypes<FT>::Scalar c = (dampedDot + one) * half;
    if ( c.isLessEqualZero() | dotProd.isLess(-nearlyOne) )
    {
        qOut.setFlippedRotation( from );
    }
    else
    {
        if( dotProd.isGreater(nearlyOne) )
        {
            qOut.setIdentity();
        }
        else
        {
            const typename hkFloatTypes<FT>::Scalar cosT2 = c.template sqrt<HK_ACC_MID,HK_SQRT_IGNORE>();

            typename hkFloatTypes<FT>::Vector cross; cross.setCross( from, to ); // sin(theta)* axis

            // Using sin(theta) = 2*cos(theta/2)*sin(theta/2)
            typename hkFloatTypes<FT>::Scalar rescaleSin; rescaleSin.setDiv(gain * half, cosT2);
            cross.mul( rescaleSin );
            cross.template setComponent<3>(cosT2);

            // renormalize for gain.
            cross.template normalize<4>();
            qOut.m_vec = cross;
        }
    }

    HK_MATH_ASSERT(0xad78999e, qOut.isOk(), "Resulting quaternion is not normalized.");
}

//
//  Computes a clamped quaternion such that the rotation angle is less than the given value.
//      q0 = [u * sin(a/2), cos(a/2)]

template <typename FT>
HK_INLINE hkBool32 HK_CALL hkQuaternionUtilImpl<FT>::_computeClampedRotation(typename hkFloatTypes<FT>::Quaternion_ q0, typename hkFloatTypes<FT>::Scalar_ cosMaxAngle, typename hkFloatTypes<FT>::Quaternion& qClamped)
{
    // Make sure the rotation angle is between [0, 180] degrees
    typename hkFloatTypes<FT>::Quaternion q;
    q.setIdentity();        // [0, 0, 0, 1]
    q.setClosest(q0, q);    // +/-[u * sin(a/2), cos(a/2)] such that cos(a/2) is positive

    // Extract rotation axis
    typename hkFloatTypes<FT>::Vector u;
    const typename hkFloatTypes<FT>::Vector vRotationAxis   = q.getImag();                          // u * sin(a/2)
    const typename hkFloatTypes<FT>::Scalar sinHalfSq       = vRotationAxis.template lengthSquared<3>();        // = sin(a/2) * (sin(a/2)
    const typename hkFloatTypes<FT>::Scalar invSinHalf      = sinHalfSq.sqrtInverse();              // = 1 / sin(a/2)
    u.setMul(invSinHalf, vRotationAxis);
    u.setSelect(sinHalfSq.greater(hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_EPS_SQRD>()), u, hkFloatTypes<FT>::Vector::template getConstant<HK_QUADREAL_1000>());

    // Compute cos(a) = cos(a/2) * cos(a/2) - sin(a/2) * (sin(a/2) = 1 - 2 * sin(a/2) * (sin(a/2)
    // We want cos(a) >= cos(aMax), with 0 <= aMax  <= PI
    const typename hkFloatTypes<FT>::Scalar cosA            = hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_1>() - (sinHalfSq + sinHalfSq);

    // Compute the clamped quaternion
    typename hkFloatTypes<FT>::Vector v;
    v.setAll(cosMaxAngle);                              // [cos(b), cos(b), cos(b), cos(b)]
    v.template setNeg<3>(v);                                        // [-cos(b), -cos(b), -cos(b), cos(b)]
    v.add(hkFloatTypes<FT>::Vector::template getConstant<HK_QUADREAL_1>());     // [1 - cos(b), 1 - cos(b), 1 - cos(b), 1 + cos(b)]
    v.mul(hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_INV_2>());                             // [sin(b/2), sin(b/2), sin(b/2), cos(b/2)]^2
    v.setSqrt(v);                                       // [sin(b/2), sin(b/2), sin(b/2), cos(b/2)]
    u.template setComponent<3>(hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_1>());                    // [u, 1]
    u.mul(v);                                           // [u * sin(b/2), cos(b/2)]
    const typename hkFloatTypes<FT>::Comparison cmp = cosA.less(cosMaxAngle);
    qClamped.m_vec.setSelect(cmp, u, q0.m_vec);
    return cmp.anyIsSet();
}

//
//  Increments (integrates) the given quaternion with the given axis-angle delta

template <typename FT>
HK_INLINE void HK_CALL hkQuaternionUtilImpl<FT>::_increment(typename hkFloatTypes<FT>::Quaternion_ qSrc, typename hkFloatTypes<FT>::Vec4_ axisAngleDelta, typename hkFloatTypes<FT>::Quaternion& qDst)
{
    typename hkFloatTypes<FT>::Vector halfAxisAngle;    halfAxisAngle.setMul(hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_INV_2>(), axisAngleDelta);
    typename hkFloatTypes<FT>::Quaternion dq;           dq.m_vec.setXYZ_W(halfAxisAngle, hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_1>());          // [angle * axis / 2, 1]
    typename hkFloatTypes<FT>::Quaternion q;                q.setMul(dq, qSrc);
                                                        q.normalize();
    qDst = q;
}

//
//  Computes the angular velocity that rotates qSrc to qDst

template <typename FT>
HK_INLINE void HK_CALL hkQuaternionUtilImpl<FT>::_computeAngularVelocity(typename hkFloatTypes<FT>::Quaternion_ qSrc, typename hkFloatTypes<FT>::Quaternion_ qDst, typename hkFloatTypes<FT>::Scalar_ invDt, typename hkFloatTypes<FT>::Vector& wOut)
{
    typedef typename hkFloatTypes<FT>::Quaternion   Quat;
    typedef typename hkFloatTypes<FT>::Vector       Vec;

    // Make sure src is in the same hemisphere as dst otherwise we may calculate the velocity for the longest rotation
    // between the two
    Quat qSrcClosest; qSrcClosest.setClosest(qSrc, qDst);

    // qDst = Normalize[dq * qSrc]
    Quat dq;    dq.setMulInverse(qDst, qSrcClosest);
                dq.template normalize<HK_ACC_FULL, HK_SQRT_SET_ZERO>();

    // Compute w = AxisAngle[dq] = 2 * Log[dq] = AxisAngle[qDst * Inverse[qSrc]]
    Vec w;      hkQuaternionUtilImpl<FT>::_computeLog(dq, w);
                w.add(w);
    wOut.setMul(invDt, w);
}

//
//  Computes the maximum angular velocity that can be applied to quaternion qA along rotation axis dw (ASSUMED normalized) that will keep
//  the relative angle between qA and qB under the given threshold.

template <typename FT>
HK_INLINE const typename hkFloatTypes<FT>::Scalar HK_CALL hkQuaternionUtilImpl<FT>::_computeMaximumAllowedVelocity(typename hkFloatTypes<FT>::Quaternion_ qA, typename hkFloatTypes<FT>::Quaternion_ qB, typename hkFloatTypes<FT>::Vec4_ rotAxis, typename hkFloatTypes<FT>::Scalar_ cosMax)
{
    // We want to limit the relative rotation between qA and qB
    // Let pAB = Inverse[qB] * qA.
    //
    // A displacement of qA can be expressed w.r.t. the rotation angle a as:
    //      qA[a] = Quaternion[u * a / 2, 1] * qA
    //      pAB[a]  = Normalize[Inverse[qB] * qW * qA], and moving to quaternion operator notation
    //              = Normalize[ Transpose[Post[qB]] * Pre[qA] * Quaternion[u * a / 2, 1] ]
    //
    // Let M = Transpose[Post[qB]] * Pre[qA], a 4x4 matrix
    //      pAB[a] = {x[a], y[a], z[a], w[a]} = {v * sin(b/2), cos(b/2)} and we want to limit cos(b) >= cosMax
    //      cos(b) = cos(b/2)^2 - sin(b/2)^2 = (w^2 - (x^2 + y^2 + z^2)) / (x^2 + y^2 + z^2 + w^2) >= cosMax
    //
    // Therefore, we have to solve the equation for a:
    //      (1 + cosMax) * (x^2 + y^2 + z^2) + (cosMax - 1) * w^2 <= 0
    //
    //  Let M = {M00, M01, M10, M11}, where M00 is 3x3, M01 = 3x1, M10 = 1x3, M11 = 1x1.
    //      pAB = {x, y, z, w}  = {(M00 * u/2) * a + M01, (M10 * u/2) * a + M11}
    //      (x^2 + y^2 + z^2)   = (M00 * u/2)^2 * a^2 + 2 * (M00 * u/2 * M01) * a + M01^2
    //      w^2                 = (M10 * u/2)^2 * a^2 + 2 * (M10 * u/2 * M11) * a + M11^2

    // Compute matrix M based on connection pivots
    typename hkFloatTypes<FT>::Matrix4 M;
    {
        typename hkFloatTypes<FT>::Matrix4 preA;        _computePreMultiplyOperator(qA, preA);
        typename hkFloatTypes<FT>::Matrix4 postB;   _computePostMultiplyOperator(qB, postB);
        {
            typename hkFloatTypes<FT>::Vector c0;   hkVector4UtilImpl<FT>::dot4_1vs4(preA.template getColumn<0>(), postB.template getColumn<0>(), postB.template getColumn<1>(), postB.template getColumn<2>(), postB.template getColumn<3>(), c0);
            typename hkFloatTypes<FT>::Vector c1;   hkVector4UtilImpl<FT>::dot4_1vs4(preA.template getColumn<1>(), postB.template getColumn<0>(), postB.template getColumn<1>(), postB.template getColumn<2>(), postB.template getColumn<3>(), c1);
            typename hkFloatTypes<FT>::Vector c2;   hkVector4UtilImpl<FT>::dot4_1vs4(preA.template getColumn<2>(), postB.template getColumn<0>(), postB.template getColumn<1>(), postB.template getColumn<2>(), postB.template getColumn<3>(), c2);
            typename hkFloatTypes<FT>::Vector c3;   hkVector4UtilImpl<FT>::dot4_1vs4(preA.template getColumn<3>(), postB.template getColumn<0>(), postB.template getColumn<1>(), postB.template getColumn<2>(), postB.template getColumn<3>(), c3);

            M.setCols(c0, c1, c2, c3);
        }
    }

    // Compute equation coeffs, i.e. c0 * a^2 + 2 * c1 * a + c0 = 0
    typename hkFloatTypes<FT>::Scalar c0, c1, c2;
    {
        typename hkFloatTypes<FT>::Vector halfU;        halfU.setMul(hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_INV_2>(), rotAxis);
        typename hkFloatTypes<FT>::Vector M00_u;        M.transformDirection(halfU, M00_u);     // (M00 * u/2)
        const typename hkFloatTypes<FT>::Vector M01     = M.template getColumn<3>();                        // M01
        const typename hkFloatTypes<FT>::Scalar M11 = M01.template getComponent<3>();               // M11
        const typename hkFloatTypes<FT>::Scalar M10_u   = M00_u.template getComponent<3>();             // (M10 * u/2), this results as a by-product of transformDirection

        // (x^2 + y^2 + z^2) = xyz_c2 * a^2 + 2 * xyz_c1 * a + xyz_c0
        typename hkFloatTypes<FT>::Scalar cos_p1;       cos_p1.setAdd(hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_1>(), cosMax);             // (1 + cosMax)
        typename hkFloatTypes<FT>::Scalar xyz_c2;       xyz_c2.setMul(cos_p1, M00_u.template lengthSquared<3>());   // (1 + cosMax) * (M00 * u/2)^2
        typename hkFloatTypes<FT>::Scalar xyz_c1;       xyz_c1.setMul(cos_p1, M00_u.template dot<3>(M01));          // (1 + cosMax) * (M00 * u/2) * M01
        typename hkFloatTypes<FT>::Scalar xyz_c0;       xyz_c0.setMul(cos_p1, M01.template lengthSquared<3>());     // (1 + cosMax) * M01^2

        // w^2 = w_c2 * a^2 + 2 * w_c1 * a + w_c0
        typename hkFloatTypes<FT>::Scalar w_c2;     w_c2.setMul(M10_u, M10_u);              // (M10 * u/2)^2
        typename hkFloatTypes<FT>::Scalar w_c1;     w_c1.setMul(M10_u, M11);                // (M10 * u/2 * M11)
        typename hkFloatTypes<FT>::Scalar w_c0;     w_c0.setMul(M11, M11);                  // M11^2

        typename hkFloatTypes<FT>::Scalar cos_m1;       cos_m1.setSub(cosMax, hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_1>()); // (cosMax - 1)
        c2.setAddMul(xyz_c2, cos_m1, w_c2);     // (1 + cosMax) * (M00 * u/2)^2     + (cosMax - 1) * (M10 * u/2)^2
        c1.setAddMul(xyz_c1, cos_m1, w_c1);     // (1 + cosMax) * (M00 * u/2) * M01 + (cosMax - 1) * (M10 * u/2 * M11)
        c0.setAddMul(xyz_c0, cos_m1, w_c0);     // (1 + cosMax) * M01^2             + (cosMax - 1) * M11^2
    }

    // Solve equation for a
    typename hkFloatTypes<FT>::Scalar absC2;    absC2.setAbs(c2);
    if ( absC2.isGreaterEqual(hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_EPS>()) )
    {
        // Quadratic
        typename hkFloatTypes<FT>::Scalar DSq;          DSq.setMul(c1, c1);         DSq.subMul(c2, c0);
        typename hkFloatTypes<FT>::Scalar D;            D.setAbs(DSq);              D = D.sqrt();
        const typename hkFloatTypes<FT>::Scalar invC2   = -c2.reciprocal();
        typename hkFloatTypes<FT>::Scalar a1;           a1.setAdd(c1, D);           a1.mul(invC2);  // -(c1 + D) / c0
        typename hkFloatTypes<FT>::Scalar a2;           a2.setSub(c1, D);           a2.mul(invC2);  // -(c1 - D) / c0
        typename hkFloatTypes<FT>::Scalar a12;          a12.setMax(a1, a2);

        // Back-up. If the (discriminant < 0 or a12 < 0) then: if c2 > 0, the function will always be positive for positive a, so limit is 0.
        // If c2 < 0, the function will always be negative for positive a, so no limit!
        {
            typename hkFloatTypes<FT>::Scalar aBackup;              aBackup.setZero();
            aBackup.setSelect(c2.lessZero(), hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_MAX>(), aBackup);
            typename hkFloatTypes<FT>::Comparison cmpDegen; cmpDegen.setOr(DSq.lessZero(), a12.lessZero());
            a12.setSelect(cmpDegen, aBackup, a12);
        }

        return a12;
    }

    typename hkFloatTypes<FT>::Scalar absC1;    absC1.setAbs(c1);
    if ( absC1.isGreaterEqual(hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_EPS>()) )
    {
        // Linear, i.e. 2 * c1 * a + c0 = 0
        //      a = -c0 / (2 c1)
        typename hkFloatTypes<FT>::Scalar a1;           a1.setDiv(-hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_INV_2>() * c0, c1);
        typename hkFloatTypes<FT>::Scalar aBackup;      aBackup.setZero();
        aBackup.setSelect(c1.lessZero(), hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_MAX>(), aBackup);
        a1.setSelect(a1.lessZero(), aBackup, a1);
        return a1;
    }

    {
        // Else, either 0 or no limit, based on the sign of c0
        typename hkFloatTypes<FT>::Scalar a0;       a0.setZero();
        a0.setSelect(c0.lessEqualZero(), hkFloatTypes<FT>::Scalar::template getConstant<HK_QUADREAL_MAX>(), a0);
        return a0;
    }
}

//
//  Computes the cosine of the rotation angle

template <typename FT>
HK_INLINE const typename hkFloatTypes<FT>::Scalar HK_CALL hkQuaternionUtilImpl<FT>::_computeCosRotationAngle(typename hkFloatTypes<FT>::Quaternion_ q)
{
    // q = len[q] * [v * sin(a/2), cos(a/2)] = [x, y, z, w]
    // cos(a/2)^2 = w^2 / (x^2 + y^2 + z^2 + w^2)
    // sin(a/2)^2 = (x^2 + y^2 + z^2) / (x^2 + y^2 + z^2 + w^2)
    // cos(a) = cos(a/2)^2 - sin(a/2)^2 = (w^2 - x^2 - y^2 - z^2)  / (x^2 + y^2 + z^2 + w^2)
    const typename hkFloatTypes<FT>::Scalar sinSq   = q.getImag().template lengthSquared<3>();      // (x^2 + y^2 + z^2)
    typename hkFloatTypes<FT>::Scalar cosSq;        cosSq.setMul(q.getRealPart(), q.getRealPart());     // (w^2)
    typename hkFloatTypes<FT>::Scalar lenSq;        lenSq.setAdd(sinSq, cosSq);                         // (x^2 + y^2 + z^2 + w^2)
    typename hkFloatTypes<FT>::Scalar cosA;     cosA.setSub(cosSq, sinSq);                          // (w^2 - w^2 - x^2 - y^2 - z^2)
                                                cosA.div(lenSq);                                    // (w^2 - w^2 - x^2 - y^2 - z^2) / (x^2 + y^2 + z^2 + w^2)
    return cosA;
}

//
//  Returns true if the given quaternions are almost equal

template <typename FT>
HK_INLINE hkBool32 hkQuaternionUtilImpl<FT>::_areApproximatelyEqual(typename hkFloatTypes<FT>::Quaternion_ qA, typename hkFloatTypes<FT>::Quaternion_ qB, typename hkFloatTypes<FT>::Scalar_ epsilon)
{
    typename hkFloatTypes<FT>::Vector d;
    typename hkFloatTypes<FT>::Vector e;
    d.setSub(qA.m_vec, qB.m_vec);
    d.setAbs(d);
    e.setAll(epsilon);
    return d.lessEqual(e).allAreSet();
}

#undef ALL_SIMD_SSE_PLATFORMS

/*
 * Havok SDK - Base file, BUILD(#20180110)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2018 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
